Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ dependencies = [
"numcodecs>=0.13.0,<0.17",
"numcodecs-combinators[xarray]~=0.2.13",
"numcodecs-observers~=0.1.2",
"numcodecs-random-projection==0.1.0a3",
"numcodecs-replace==0.1.0",
"numcodecs-safeguards==0.1.0b2",
"numcodecs-wasm==0.2.2",
Expand All @@ -25,6 +26,7 @@ dependencies = [
"numcodecs-wasm-round==0.5.0",
"numcodecs-wasm-sperr==0.2.0",
"numcodecs-wasm-stochastic-rounding==0.2.0",
"numcodecs-wasm-swizzle-reshape==0.4.0",
"numcodecs-wasm-sz3==0.7.0",
"numcodecs-wasm-tthresh==0.3.0",
"numcodecs-wasm-zfp==0.6.0",
Expand Down
8 changes: 8 additions & 0 deletions src/climatebenchpress/compressor/compressors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@
"BitRound",
"BitRoundPco",
"Jpeg2000",
"RP",
"RPDct",
"SafeguardedBitRoundPco",
"SafeguardedRP",
"SafeguardedRPDct",
"SafeguardedSperr",
"SafeguardedSz3",
"SafeguardedZero",
Expand All @@ -21,8 +25,12 @@
from .bitround import BitRound
from .bitround_pco import BitRoundPco
from .jpeg2000 import Jpeg2000
from .rp import RP
from .rp_dct import RPDct
from .safeguarded import (
SafeguardedBitRoundPco,
SafeguardedRP,
SafeguardedRPDct,
SafeguardedSperr,
SafeguardedSz3,
SafeguardedZero,
Expand Down
25 changes: 25 additions & 0 deletions src/climatebenchpress/compressor/compressors/rp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
__all__ = ["RP"]

import numcodecs_random_projection
import numcodecs_wasm_swizzle_reshape
from numcodecs_combinators.framed import FramedCodecStack

from .abc import Compressor


class RP(Compressor):
name = "rp"
description = "Random Projection (Gaussian)"

@staticmethod
def abs_bound_codec(error_bound, **kwargs):
return FramedCodecStack(
numcodecs_wasm_swizzle_reshape.SwizzleReshape(axes=[[0, 1, 2], [3, 4]]),
numcodecs_random_projection.RPCodec(
mae=error_bound,
method="gaussian",
seed=42,
max_block_memory=2**28, # 256 MiB
debug=True,
),
)
24 changes: 24 additions & 0 deletions src/climatebenchpress/compressor/compressors/rp_dct.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
__all__ = ["RPDct"]

import numcodecs_random_projection
import numcodecs_wasm_swizzle_reshape
from numcodecs_combinators.framed import FramedCodecStack

from .abc import Compressor


class RPDct(Compressor):
name = "rp-dct"
description = "Random Projection (DCT)"

@staticmethod
def abs_bound_codec(error_bound, **kwargs):
return FramedCodecStack(
numcodecs_wasm_swizzle_reshape.SwizzleReshape(axes=[[0, 1, 2], [3, 4]]),
numcodecs_random_projection.RPCodec(
mae=error_bound,
method="dct",
max_block_memory=2**28, # 256 MiB
debug=True,
),
)
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
__all__ = [
"SafeguardedBitRoundPco",
"SafeguardedRP",
"SafeguardedRPDct",
"SafeguardedSperr",
"SafeguardedSz3",
"SafeguardedZero",
Expand All @@ -8,6 +10,8 @@
]

from .bitround_pco import SafeguardedBitRoundPco
from .rp import SafeguardedRP
from .rp_dct import SafeguardedRPDct
from .sperr import SafeguardedSperr
from .sz3 import SafeguardedSz3
from .zero import SafeguardedZero
Expand Down
56 changes: 56 additions & 0 deletions src/climatebenchpress/compressor/compressors/safeguarded/rp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
__all__ = ["SafeguardedRP"]

import numcodecs_random_projection
import numcodecs_safeguards
import numcodecs_wasm_swizzle_reshape
from numcodecs_combinators.framed import FramedCodecStack

from ..abc import Compressor


class SafeguardedRP(Compressor):
"""Safeguarded RP compressor."""

name = "safeguarded-rp"
description = "Safeguarded(RP[Gaussian])"

@staticmethod
def abs_bound_codec(error_bound, **kwargs):
return numcodecs_safeguards.SafeguardedCodec(
codec=FramedCodecStack(
numcodecs_wasm_swizzle_reshape.SwizzleReshape(axes=[[0, 1, 2], [3, 4]]),
numcodecs_random_projection.RPCodec(
mae=error_bound,
method="gaussian",
seed=42,
max_block_memory=2**28, # 256 MiB
debug=True,
),
),
safeguards=[
dict(kind="eb", type="abs", eb=error_bound, equal_nan=True),
],
)

@staticmethod
def rel_bound_codec(error_bound, *, data_abs_min=None, **kwargs):
assert data_abs_min is not None, "data_abs_min must be provided"

return numcodecs_safeguards.SafeguardedCodec(
# conservative rel->abs error bound transformation,
# same as convert_rel_error_to_abs_error
# so that we can inform the safeguards of the rel bound
codec=FramedCodecStack(
numcodecs_wasm_swizzle_reshape.SwizzleReshape(axes=[[0, 1, 2], [3, 4]]),
numcodecs_random_projection.RPCodec(
mae=error_bound * data_abs_min,
method="gaussian",
seed=42,
max_block_memory=2**28, # 256 MiB
debug=True,
),
),
safeguards=[
dict(kind="eb", type="rel", eb=error_bound, equal_nan=True),
],
)
54 changes: 54 additions & 0 deletions src/climatebenchpress/compressor/compressors/safeguarded/rp_dct.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
__all__ = ["SafeguardedRPDct"]

import numcodecs_random_projection
import numcodecs_safeguards
import numcodecs_wasm_swizzle_reshape
from numcodecs_combinators.framed import FramedCodecStack

from ..abc import Compressor


class SafeguardedRPDct(Compressor):
"""Safeguarded RP (DCT) compressor."""

name = "safeguarded-rp-dct"
description = "Safeguarded(RP[DCT])"

@staticmethod
def abs_bound_codec(error_bound, **kwargs):
return numcodecs_safeguards.SafeguardedCodec(
codec=FramedCodecStack(
numcodecs_wasm_swizzle_reshape.SwizzleReshape(axes=[[0, 1, 2], [3, 4]]),
numcodecs_random_projection.RPCodec(
mae=error_bound,
method="dct",
max_block_memory=2**28, # 256 MiB
debug=True,
),
),
safeguards=[
dict(kind="eb", type="abs", eb=error_bound, equal_nan=True),
],
)

@staticmethod
def rel_bound_codec(error_bound, *, data_abs_min=None, **kwargs):
assert data_abs_min is not None, "data_abs_min must be provided"

return numcodecs_safeguards.SafeguardedCodec(
# conservative rel->abs error bound transformation,
# same as convert_rel_error_to_abs_error
# so that we can inform the safeguards of the rel bound
codec=FramedCodecStack(
numcodecs_wasm_swizzle_reshape.SwizzleReshape(axes=[[0, 1, 2], [3, 4]]),
numcodecs_random_projection.RPCodec(
mae=error_bound * data_abs_min,
method="dct",
max_block_memory=2**28, # 256 MiB
debug=True,
),
),
safeguards=[
dict(kind="eb", type="rel", eb=error_bound, equal_nan=True),
],
)
4 changes: 1 addition & 3 deletions src/climatebenchpress/compressor/scripts/compress.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,9 +180,7 @@ def compress(
json.dump(measurements, f)

with progress_bar(progress):
ds_new.to_zarr(
compressed_dataset_path, encoding=dict(), compute=False
).compute()
ds_new.to_zarr(compressed_dataset_path, compute=False).compute()


def compress_decompress(
Expand Down