Testing Virtual Zarr with RiOMar and Icechunk

Testing Virtual Zarr with RiOMar and Icechunk#

Warning

This notebook is a test and demonstrate that it currently does not work to use VirtualZarr with Icechunk with data on Pangeo-EOSC

Notebook#

  • Justus Magin (author), CNRS-LOPS (France), @keewis

Bibliography and other interesting resources#

from functools import partial

import fsspec
import virtualizarr
import xarray as xr

fs = fsspec.filesystem("http")
inroot = "https://data-fair2adapt.ifremer.fr/riomar/GAMAR"
urls = fs.glob(f"{inroot}/*.nc")
import distributed

cluster = distributed.LocalCluster(n_workers=2)
client = cluster.get_client()
client
func = partial(
    virtualizarr.open_virtual_dataset,
    backend=virtualizarr.readers.hdf.HDFVirtualBackend,
    indexes={},
    loadable_variables=[
        "time_counter",
        "time_instant",
        "x_rho",
        "y_rho",
        "x_u",
        "x_v",
        "y_u",
        "y_v",
        "axis_nbounds",
    ],
    decode_times=True,
)

futures = client.map(func, urls[:2])
dss = client.gather(futures)
dss
ds = xr.concat(
    dss,
    dim="time_counter",
    compat="override",
    coords="minimal",
    combine_attrs="drop_conflicts",
).set_coords(["time_counter_bounds", "time_instant_bounds"])
ds
import icechunk
storage = icechunk.local_filesystem_storage("riomar.zarr")
storage
repo = icechunk.Repository.open_or_create(storage)
repo
session = repo.writable_session("main")
store = session.store
store
ds.virtualize.to_icechunk(store)
session.commit("first two datasets")
rs = repo.readonly_session(branch="main")
rs
xr.open_dataset(rs.store, engine="zarr", chunks={}, consolidated=False)