diff --git a/pyproject.toml b/pyproject.toml index c42f0f7..9a3f7cb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,7 +38,7 @@ dev = [ "mypy~=1.14", "pandas-stubs~=2.2.0", "pytest~=8.3", - "ruff~=0.8", + "ruff~=0.15", "types-requests~=2.32.0.20241016", "types-tqdm~=4.67.0.20250301", "universal-pathlib~=0.2.0", diff --git a/src/climatebenchpress/data_loader/__init__.py b/src/climatebenchpress/data_loader/__init__.py index 80a1a35..1a363c6 100644 --- a/src/climatebenchpress/data_loader/__init__.py +++ b/src/climatebenchpress/data_loader/__init__.py @@ -51,7 +51,7 @@ def open_downloaded_canonicalized_dataset( ds = canon.canonicalize_dataset(ds) with monitor.progress_bar(progress): - ds.to_zarr(standardized, encoding=dict(), compute=False).compute() + ds.to_zarr(standardized, compute=False).compute() return xr.open_dataset(standardized, chunks=dict(), engine="zarr") @@ -96,20 +96,9 @@ def open_downloaded_tiny_canonicalized_dataset( ds = canon.canonical_tiny_dataset(ds, slices=slices) # Rechunk the data because "tiny-fication" can lead to inconsistent or # suboptimal chunking. - ds = _rechunk_dataset(ds) + ds = ds.chunk(-1) with monitor.progress_bar(progress): - ds.to_zarr( - standardized, encoding=dict(), compute=False, consolidated=True - ).compute() + ds.to_zarr(standardized, compute=False, consolidated=True).compute() return xr.open_dataset(standardized, chunks=dict(), engine="zarr") - - -def _rechunk_dataset(ds: xr.Dataset) -> xr.Dataset: - rechunked = ds.copy() - for var_name in ds.data_vars: - if hasattr(ds[var_name].data, "chunks"): - rechunked[var_name] = ds[var_name].chunk("auto") - - return rechunked diff --git a/src/climatebenchpress/data_loader/datasets/cams.py b/src/climatebenchpress/data_loader/datasets/cams.py index 5f84631..c1e2e87 100644 --- a/src/climatebenchpress/data_loader/datasets/cams.py +++ b/src/climatebenchpress/data_loader/datasets/cams.py @@ -43,7 +43,11 @@ def download(download_path: Path, progress: bool = True): @staticmethod def open(download_path: Path) -> xr.Dataset: - ds = xr.open_dataset(download_path / Path(NO2_FILE).name).chunk(-1) + ds = ( + xr.open_dataset(download_path / Path(NO2_FILE).name) + .drop_encoding() + .chunk(-1) + ) # valid_time contains actual dates, whereas step is the seconds (in simulated time) # since the model as been initialised. diff --git a/src/climatebenchpress/data_loader/datasets/cmip6/abc.py b/src/climatebenchpress/data_loader/datasets/cmip6/abc.py index f4c8a31..61bdc1c 100644 --- a/src/climatebenchpress/data_loader/datasets/cmip6/abc.py +++ b/src/climatebenchpress/data_loader/datasets/cmip6/abc.py @@ -62,13 +62,13 @@ def download_with( ds = ds[variable_selector] with monitor.progress_bar(progress): - ds.to_zarr(downloadfile, mode="w", encoding=dict(), compute=False).compute() + ds.to_zarr(downloadfile, mode="w", compute=False).compute() donefile.touch() @staticmethod def open(download_path: Path) -> xr.Dataset: - return xr.open_zarr(download_path / "download.zarr") + return xr.open_zarr(download_path / "download.zarr").drop_encoding().chunk(-1) @lru_cache @staticmethod diff --git a/src/climatebenchpress/data_loader/datasets/era5.py b/src/climatebenchpress/data_loader/datasets/era5.py index 8de8583..caed1fb 100644 --- a/src/climatebenchpress/data_loader/datasets/era5.py +++ b/src/climatebenchpress/data_loader/datasets/era5.py @@ -44,18 +44,18 @@ def download(download_path: Path, progress: bool = True): "10m_u_component_of_wind", "10m_v_component_of_wind", ] - ].chunk(-1) + ] # Needed to make the dataset CF-compliant. ds.time.attrs["standard_name"] = "time" ds.longitude.attrs["axis"] = "X" ds.latitude.attrs["axis"] = "Y" with monitor.progress_bar(progress): - ds.to_zarr(downloadfile, mode="w", encoding=dict(), compute=False).compute() + ds.to_zarr(downloadfile, mode="w", compute=False).compute() donefile.touch() @staticmethod def open(download_path: Path) -> xr.Dataset: - return xr.open_zarr(download_path / "download.zarr") + return xr.open_zarr(download_path / "download.zarr").drop_encoding().chunk(-1) if __name__ == "__main__": diff --git a/src/climatebenchpress/data_loader/datasets/esa_biomass_cci.py b/src/climatebenchpress/data_loader/datasets/esa_biomass_cci.py index f05a887..12460d6 100644 --- a/src/climatebenchpress/data_loader/datasets/esa_biomass_cci.py +++ b/src/climatebenchpress/data_loader/datasets/esa_biomass_cci.py @@ -47,7 +47,7 @@ def download(download_path: Path, progress: bool = True): @staticmethod def open(download_path: Path) -> xr.Dataset: # Need string conversion for argument to be interpreted as a glob pattern. - ds = xr.open_mfdataset(str(download_path / "*.nc")) + ds = xr.open_mfdataset(str(download_path / "*.nc")).drop_encoding() # Needed to make the dataset CF-compliant. ds.lon.attrs["axis"] = "X" ds.lat.attrs["axis"] = "Y" diff --git a/src/climatebenchpress/data_loader/datasets/ifs_humidity.py b/src/climatebenchpress/data_loader/datasets/ifs_humidity.py index 8f51d5c..be9a18e 100644 --- a/src/climatebenchpress/data_loader/datasets/ifs_humidity.py +++ b/src/climatebenchpress/data_loader/datasets/ifs_humidity.py @@ -39,13 +39,11 @@ def download(download_path: Path, progress: bool = True): ) downloadfile = download_path / "ifs_humidity.zarr" with monitor.progress_bar(progress): - ds_regridded.to_zarr( - downloadfile, mode="w", encoding=dict(), compute=False - ).compute() + ds_regridded.to_zarr(downloadfile, mode="w", compute=False).compute() @staticmethod def open(download_path: Path) -> xr.Dataset: - ds = xr.open_dataset(download_path / "ifs_humidity.zarr") + ds = xr.open_zarr(download_path / "ifs_humidity.zarr").drop_encoding() num_levels = ds["level"].size ds = ds.isel(time=slice(0, 1)).chunk( { diff --git a/src/climatebenchpress/data_loader/datasets/ifs_uncompressed.py b/src/climatebenchpress/data_loader/datasets/ifs_uncompressed.py index 3e0a7d8..9e60e50 100644 --- a/src/climatebenchpress/data_loader/datasets/ifs_uncompressed.py +++ b/src/climatebenchpress/data_loader/datasets/ifs_uncompressed.py @@ -39,13 +39,15 @@ def download(download_path: Path, progress: bool = True): ) downloadfile = download_path / "ifs_uncompressed.zarr" with monitor.progress_bar(progress): - ds_regridded.to_zarr( - downloadfile, mode="w", encoding=dict(), compute=False - ).compute() + ds_regridded.to_zarr(downloadfile, mode="w", compute=False).compute() @staticmethod def open(download_path: Path) -> xr.Dataset: - ds = xr.open_dataset(download_path / "ifs_uncompressed.zarr") + ds = ( + xr.open_dataset(download_path / "ifs_uncompressed.zarr") + .drop_encoding() + .chunk(-1) + ) # Needed to make the dataset CF-compliant. ds.longitude.attrs["axis"] = "X" @@ -106,7 +108,11 @@ def load_hplp_data(leveltype=None, gridtype=None, step=None, remap=False): return xr.open_dataset( "reference://", engine="zarr", - backend_kwargs=dict(storage_options=dict(fo=ref, asynchronous=False)), + backend_kwargs=dict( + storage_options=dict( + fo=ref, asynchronous=False, remote_options=dict(ssl=False) + ) + ), consolidated=False, ) @@ -149,9 +155,9 @@ def regrid_to_regular(ds, in_grid, out_grid): out_data[var].append(r) dx = out_grid["grid"][0] - assert ( - out_grid["grid"][0] == out_grid["grid"][1] - ), "Only grids with equal latitude and longitude spacing are supported." + assert out_grid["grid"][0] == out_grid["grid"][1], ( + "Only grids with equal latitude and longitude spacing are supported." + ) lats = np.linspace(90, -90, int(180 / dx) + 1) lons = np.linspace(0, 360 - dx, int(360 / dx)) coords = { diff --git a/src/climatebenchpress/data_loader/datasets/nextgems.py b/src/climatebenchpress/data_loader/datasets/nextgems.py index 631f17c..0b9a522 100644 --- a/src/climatebenchpress/data_loader/datasets/nextgems.py +++ b/src/climatebenchpress/data_loader/datasets/nextgems.py @@ -71,12 +71,12 @@ def download(download_path: Path, progress: bool = True): ds.lat.attrs["axis"] = "Y" with monitor.progress_bar(progress): - ds.to_zarr(downloadfile, mode="w", encoding=dict(), compute=False).compute() + ds.to_zarr(downloadfile, mode="w", compute=False).compute() donefile.touch() @staticmethod def open(download_path: Path) -> xr.Dataset: - return xr.open_zarr(download_path / "download.zarr") + return xr.open_zarr(download_path / "download.zarr").drop_encoding().chunk(-1) def _get_nn_lon_lat_index(nside, lons, lats): diff --git a/tests/test_virtual.py b/tests/test_virtual.py index 3820b94..c3cc59b 100644 --- a/tests/test_virtual.py +++ b/tests/test_virtual.py @@ -40,7 +40,6 @@ def download(download_path: Path, progress: bool = True): ds.to_zarr( download_path / "download.zarr", mode="w", - encoding=dict(), compute=False, ).compute()