Testing Virtual Zarr with RiOMar and Icechunk#
Warning
This notebook is a test and demonstrate that it currently does not work to use VirtualZarr with Icechunk with data on Pangeo-EOSC
Notebook#
Justus Magin (author), CNRS-LOPS (France), @keewis
Bibliography and other interesting resources#
from functools import partial
import fsspec
import virtualizarr
import xarray as xr
fs = fsspec.filesystem("http")
inroot = "https://data-fair2adapt.ifremer.fr/riomar/GAMAR"
urls = fs.glob(f"{inroot}/*.nc")
import distributed
cluster = distributed.LocalCluster(n_workers=2)
client = cluster.get_client()
client
func = partial(
virtualizarr.open_virtual_dataset,
backend=virtualizarr.readers.hdf.HDFVirtualBackend,
indexes={},
loadable_variables=[
"time_counter",
"time_instant",
"x_rho",
"y_rho",
"x_u",
"x_v",
"y_u",
"y_v",
"axis_nbounds",
],
decode_times=True,
)
futures = client.map(func, urls[:2])
dss = client.gather(futures)
dss
ds = xr.concat(
dss,
dim="time_counter",
compat="override",
coords="minimal",
combine_attrs="drop_conflicts",
).set_coords(["time_counter_bounds", "time_instant_bounds"])
ds
import icechunk
storage = icechunk.local_filesystem_storage("riomar.zarr")
storage
repo = icechunk.Repository.open_or_create(storage)
repo
session = repo.writable_session("main")
store = session.store
store
ds.virtualize.to_icechunk(store)
session.commit("first two datasets")
rs = repo.readonly_session(branch="main")
rs
xr.open_dataset(rs.store, engine="zarr", chunks={}, consolidated=False)