Skip to article frontmatterSkip to article content
Site not loading correctly?

This may be due to an incorrect BASE_URL configuration. See the MyST Documentation for reference.

Creation d’une catalogue .zarr de l’archive GAMAR, avec virtualizarr / icechunk

  • Basé sur ls ~fpaul/tmp/create_virtualizarr_v2_riomar_icechunk

from IPython.core.magic import register_cell_magic


@register_cell_magic
def skip(line, cell):
    return
import os
from functools import partial

import xarray as xr

Initialisation Dask

import distributed

cluster = distributed.LocalCluster()
client = cluster.get_client()
client
/opt/miniforge/lib/python3.11/site-packages/distributed/node.py:188: UserWarning: Port 8787 is already in use.
Perhaps you already have a cluster running?
Hosting the HTTP server on port 46359 instead
  warnings.warn(
Loading...
import glob

from obstore.store import LocalStore
from virtualizarr import open_virtual_dataset
from virtualizarr.registry import ObjectStoreRegistry

# base_path = '/scale/project/lops-oh-fair2adapt/riomar/GAMAR/'
base_path = "/scale/project/lops-oh-fair2adapt/"

local_store = LocalStore()
registry = ObjectStoreRegistry({f"file://{base_path}": local_store})
# registry = ObjectStoreRegistry({})  # Pour test icechunk


# parser = KerchunkJSONParser()
# ou
from virtualizarr.parsers import HDFParser

parser = HDFParser()
base_path = "/scale/project/lops-oh-fair2adapt/riomar/GAMAR/"
# fpattern=base_path + 'GAMAR_1h_inst_Y2023M*.nc'
fpattern = base_path + "GAMAR_1h_inst_Y20*M*.nc"
flist = glob.glob(fpattern)[:]
len(flist)
276
dask_open_virtual_dataset = partial(
    open_virtual_dataset,
    loadable_variables=[
        "time_counter",
        "time_instant",
        "x_rho",
        "y_rho",
        "x_u",
        "x_v",
        "y_u",
        "y_v",
        "axis_nbounds",
    ],
    parser=parser,
    registry=registry,
    decode_times=True,
)
futures = client.map(dask_open_virtual_dataset, flist)
dss = client.gather(futures)
display(dss[:2])
display(dss[-2:])
[<xarray.Dataset> Size: 273GB Dimensions: (axis_nbounds: 2, x_rho: 727, y_rho: 838, x_u: 726, y_u: 838, x_v: 727, y_v: 837, time_counter: 696, s_rho: 40, s_w: 41) Coordinates: (12/17) * axis_nbounds (axis_nbounds) float32 8B 0.0 0.0 * x_rho (x_rho) float32 3kB 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 * y_rho (y_rho) float32 3kB 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 * x_u (x_u) float32 3kB 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 * y_u (y_u) float32 3kB 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 * x_v (x_v) float32 3kB 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 ... ... nav_lat_rho (y_rho, x_rho) float32 2MB ManifestArray<shape=(838,... nav_lon_rho (y_rho, x_rho) float32 2MB ManifestArray<shape=(838,... nav_lat_u (y_u, x_u) float32 2MB ManifestArray<shape=(838, 726... nav_lon_u (y_u, x_u) float32 2MB ManifestArray<shape=(838, 726... nav_lat_v (y_v, x_v) float32 2MB ManifestArray<shape=(837, 727... nav_lon_v (y_v, x_v) float32 2MB ManifestArray<shape=(837, 727... Data variables: (12/16) time_instant_bounds (time_counter, axis_nbounds) float64 11kB ManifestAr... time_counter_bounds (time_counter, axis_nbounds) float64 11kB ManifestAr... hc float32 4B ManifestArray<shape=(), dtype=float32, ch... theta_s float32 4B ManifestArray<shape=(), dtype=float32, ch... theta_b float32 4B ManifestArray<shape=(), dtype=float32, ch... Tcline float32 4B ManifestArray<shape=(), dtype=float32, ch... ... ... sc_w (s_w) float32 164B ManifestArray<shape=(41,), dtype=... zeta (time_counter, y_rho, x_rho) float32 2GB ManifestArr... u (time_counter, s_rho, y_u, x_u) float32 68GB Manifes... v (time_counter, s_rho, y_v, x_v) float32 68GB Manifes... temp (time_counter, s_rho, y_rho, x_rho) float32 68GB Man... salt (time_counter, s_rho, y_rho, x_rho) float32 68GB Man... Attributes: (12/45) name: GAMAR_GLORYS_1h_inst description: Created by xios Conventions: CF-1.6 timeStamp: 2024-Jun-06 05:18:39 GMT uuid: b8fc912c-9215-49c8-90f0-439cfada6035 title: GAMAR_GLORYS ... ... gamma2_expl: Slipperiness parameter x_sponge: 0.0 v_sponge: 0.0 sponge_expl: Sponge parameters : extent (m) & viscosity (m2.s-1) SRCS: main.F step.F read_inp.F timers_roms.F init_scalars.F ini... CPP-options: REGIONAL GAMAR MPI TIDES OBC_WEST OBC_NORTH XIOS USE_CALE..., <xarray.Dataset> Size: 292GB Dimensions: (axis_nbounds: 2, x_rho: 727, y_rho: 838, x_u: 726, y_u: 838, x_v: 727, y_v: 837, time_counter: 744, s_rho: 40, s_w: 41) Coordinates: (12/17) * axis_nbounds (axis_nbounds) float32 8B 0.0 0.0 * x_rho (x_rho) float32 3kB 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 * y_rho (y_rho) float32 3kB 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 * x_u (x_u) float32 3kB 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 * y_u (y_u) float32 3kB 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 * x_v (x_v) float32 3kB 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 ... ... nav_lat_rho (y_rho, x_rho) float32 2MB ManifestArray<shape=(838,... nav_lon_rho (y_rho, x_rho) float32 2MB ManifestArray<shape=(838,... nav_lat_u (y_u, x_u) float32 2MB ManifestArray<shape=(838, 726... nav_lon_u (y_u, x_u) float32 2MB ManifestArray<shape=(838, 726... nav_lat_v (y_v, x_v) float32 2MB ManifestArray<shape=(837, 727... nav_lon_v (y_v, x_v) float32 2MB ManifestArray<shape=(837, 727... Data variables: (12/16) time_instant_bounds (time_counter, axis_nbounds) float64 12kB ManifestAr... time_counter_bounds (time_counter, axis_nbounds) float64 12kB ManifestAr... hc float32 4B ManifestArray<shape=(), dtype=float32, ch... theta_s float32 4B ManifestArray<shape=(), dtype=float32, ch... theta_b float32 4B ManifestArray<shape=(), dtype=float32, ch... Tcline float32 4B ManifestArray<shape=(), dtype=float32, ch... ... ... sc_w (s_w) float32 164B ManifestArray<shape=(41,), dtype=... zeta (time_counter, y_rho, x_rho) float32 2GB ManifestArr... u (time_counter, s_rho, y_u, x_u) float32 72GB Manifes... v (time_counter, s_rho, y_v, x_v) float32 72GB Manifes... temp (time_counter, s_rho, y_rho, x_rho) float32 73GB Man... salt (time_counter, s_rho, y_rho, x_rho) float32 73GB Man... Attributes: (12/45) name: GAMAR_GLORYS_1h_inst description: Created by xios Conventions: CF-1.6 timeStamp: 2024-Jun-02 07:16:43 GMT uuid: 275a24ef-c903-4fdd-9da8-d8c6a1adbddf title: GAMAR_GLORYS ... ... gamma2_expl: Slipperiness parameter x_sponge: 0.0 v_sponge: 0.0 sponge_expl: Sponge parameters : extent (m) & viscosity (m2.s-1) SRCS: main.F step.F read_inp.F timers_roms.F init_scalars.F ini... CPP-options: REGIONAL GAMAR MPI TIDES OBC_WEST OBC_NORTH XIOS USE_CALE...]
[<xarray.Dataset> Size: 263GB Dimensions: (axis_nbounds: 2, x_rho: 727, y_rho: 838, x_u: 726, y_u: 838, x_v: 727, y_v: 837, time_counter: 672, s_rho: 40, s_w: 41) Coordinates: (12/17) * axis_nbounds (axis_nbounds) float32 8B 0.0 0.0 * x_rho (x_rho) float32 3kB 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 * y_rho (y_rho) float32 3kB 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 * x_u (x_u) float32 3kB 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 * y_u (y_u) float32 3kB 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 * x_v (x_v) float32 3kB 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 ... ... nav_lat_rho (y_rho, x_rho) float32 2MB ManifestArray<shape=(838,... nav_lon_rho (y_rho, x_rho) float32 2MB ManifestArray<shape=(838,... nav_lat_u (y_u, x_u) float32 2MB ManifestArray<shape=(838, 726... nav_lon_u (y_u, x_u) float32 2MB ManifestArray<shape=(838, 726... nav_lat_v (y_v, x_v) float32 2MB ManifestArray<shape=(837, 727... nav_lon_v (y_v, x_v) float32 2MB ManifestArray<shape=(837, 727... Data variables: (12/16) time_instant_bounds (time_counter, axis_nbounds) float64 11kB ManifestAr... time_counter_bounds (time_counter, axis_nbounds) float64 11kB ManifestAr... hc float32 4B ManifestArray<shape=(), dtype=float32, ch... theta_s float32 4B ManifestArray<shape=(), dtype=float32, ch... theta_b float32 4B ManifestArray<shape=(), dtype=float32, ch... Tcline float32 4B ManifestArray<shape=(), dtype=float32, ch... ... ... sc_w (s_w) float32 164B ManifestArray<shape=(41,), dtype=... zeta (time_counter, y_rho, x_rho) float32 2GB ManifestArr... u (time_counter, s_rho, y_u, x_u) float32 65GB Manifes... v (time_counter, s_rho, y_v, x_v) float32 65GB Manifes... temp (time_counter, s_rho, y_rho, x_rho) float32 66GB Man... salt (time_counter, s_rho, y_rho, x_rho) float32 66GB Man... Attributes: (12/45) name: GAMAR_GLORYS_1h_inst description: Created by xios Conventions: CF-1.6 timeStamp: 2024-Mar-26 02:45:45 GMT uuid: 824a37c0-ec00-4c46-bda3-bd2564ef445c title: GAMAR_GLORYS ... ... gamma2_expl: Slipperiness parameter x_sponge: 0.0 v_sponge: 0.0 sponge_expl: Sponge parameters : extent (m) & viscosity (m2.s-1) SRCS: main.F step.F read_inp.F timers_roms.F init_scalars.F ini... CPP-options: REGIONAL GAMAR MPI TIDES OBC_WEST OBC_NORTH XIOS USE_CALE..., <xarray.Dataset> Size: 292GB Dimensions: (axis_nbounds: 2, x_rho: 727, y_rho: 838, x_u: 726, y_u: 838, x_v: 727, y_v: 837, time_counter: 744, s_rho: 40, s_w: 41) Coordinates: (12/17) * axis_nbounds (axis_nbounds) float32 8B 0.0 0.0 * x_rho (x_rho) float32 3kB 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 * y_rho (y_rho) float32 3kB 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 * x_u (x_u) float32 3kB 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 * y_u (y_u) float32 3kB 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 * x_v (x_v) float32 3kB 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 ... ... nav_lat_rho (y_rho, x_rho) float32 2MB ManifestArray<shape=(838,... nav_lon_rho (y_rho, x_rho) float32 2MB ManifestArray<shape=(838,... nav_lat_u (y_u, x_u) float32 2MB ManifestArray<shape=(838, 726... nav_lon_u (y_u, x_u) float32 2MB ManifestArray<shape=(838, 726... nav_lat_v (y_v, x_v) float32 2MB ManifestArray<shape=(837, 727... nav_lon_v (y_v, x_v) float32 2MB ManifestArray<shape=(837, 727... Data variables: (12/16) time_instant_bounds (time_counter, axis_nbounds) float64 12kB ManifestAr... time_counter_bounds (time_counter, axis_nbounds) float64 12kB ManifestAr... hc float32 4B ManifestArray<shape=(), dtype=float32, ch... theta_s float32 4B ManifestArray<shape=(), dtype=float32, ch... theta_b float32 4B ManifestArray<shape=(), dtype=float32, ch... Tcline float32 4B ManifestArray<shape=(), dtype=float32, ch... ... ... sc_w (s_w) float32 164B ManifestArray<shape=(41,), dtype=... zeta (time_counter, y_rho, x_rho) float32 2GB ManifestArr... u (time_counter, s_rho, y_u, x_u) float32 72GB Manifes... v (time_counter, s_rho, y_v, x_v) float32 72GB Manifes... temp (time_counter, s_rho, y_rho, x_rho) float32 73GB Man... salt (time_counter, s_rho, y_rho, x_rho) float32 73GB Man... Attributes: (12/45) name: GAMAR_GLORYS_1h_inst description: Created by xios Conventions: CF-1.6 timeStamp: 2024-May-30 13:16:58 GMT uuid: 2a4838fb-b964-426a-b3f6-4a94e9d820f9 title: GAMAR_GLORYS ... ... gamma2_expl: Slipperiness parameter x_sponge: 0.0 v_sponge: 0.0 sponge_expl: Sponge parameters : extent (m) & viscosity (m2.s-1) SRCS: main.F step.F read_inp.F timers_roms.F init_scalars.F ini... CPP-options: REGIONAL GAMAR MPI TIDES OBC_WEST OBC_NORTH XIOS USE_CALE...]
%%skip version non dask
dss = [
    open_virtual_dataset(f,
        #concat_dim='time_counter',     # Dimension concaténation : time_counter
        #loadable_variables=['temp', 'nav_lat_rho', 'nav_lon_rho'],  # Seulement utiles
        loadable_variables=["time_counter","time_instant","x_rho","y_rho","x_u","x_v","y_u","y_v","axis_nbounds"],
        parser=parser,
        #combine='nested',           # compute nested = lent (hiérarchique), by_coords = rapide
        
        registry=registry,
                
        #coords="minimal",                # garde coords contenant time_counter seulement
        #data_vars="minimal",             # garde variables contenant time_counter seulement
        #compat="override",               # ignore différences entre fichiers (prend le 1er)
        #combine_attrs="drop_conflicts",  # supprime les attributs conflictuels
        decode_times=True,
    )
    for f in flist # ~5-10 secondes pour chaque fichier visiblement
]
dss
ds = xr.concat(
    dss,
    dim="time_counter",
    compat="override",
    coords="minimal",
    combine_attrs="drop_conflicts",
).set_coords(["time_counter_bounds", "time_instant_bounds"])
ds
/tmp/ipykernel_1578578/212694250.py:2: FutureWarning: In a future version of xarray the default value for data_vars will change from data_vars='all' to data_vars=None. This is likely to lead to different results when multiple datasets have matching variables with overlapping values. To opt in to new defaults and get rid of these warnings now use `set_options(use_new_combine_kwarg_defaults=True) or set data_vars explicitly.
  xr.concat(
Loading...
outpath = "/scale/project/lops-oh-fair2adapt/fpaul/tmp/"
zarr_archive = os.path.join(outpath, "riomar.zarr")
import icechunk
storage = icechunk.local_filesystem_storage(zarr_archive)
storage
  2025-12-11T13:52:22.574050Z  WARN icechunk::storage::object_store: The LocalFileSystem storage is not safe for concurrent commits. If more than one thread/process will attempt to commit at the same time, prefer using object stores.
    at icechunk/src/storage/object_store.rs:80

ObjectStorage(backend=LocalFileSystemObjectStoreBackend(path=/scale/project/lops-oh-fair2adapt/fpaul/tmp/riomar.zarr))
## code qui devrait marcher, exemple fourni sur :
# https://icechunk.io/en/stable/virtual/
# #creating-a-virtual-dataset-with-virtualizarr
config = icechunk.RepositoryConfig.default()
# config.set_virtual_chunk_container(
#     icechunk.VirtualChunkContainer(
#         "s3://mybucket/my/data/",
#         icechunk.s3_store(region="us-east-1"),
#     )
# )

## exemple de
# https://icechunk.io/en/stable/virtual/#example_1
# (cf. Local Filesystem.. ca utilise s3 dedans,
# mais probablement pas utilise en vrai)
# config.set_virtual_chunk_container(
#     icechunk.VirtualChunkContainer(
#         "s3://mybucket/my/data",
#         icechunk.local_filesystem_store(
#             "/path/to/my"
#         ),
#     )
# )
config.set_virtual_chunk_container(
    icechunk.VirtualChunkContainer(
        "file:///scale/project/lops-oh-fair2adapt/riomar/GAMAR/",
        icechunk.local_filesystem_store("/scale/project/lops-oh-fair2adapt/riomar"),
    )
)
credentials = credentials = icechunk.containers_credentials(
    {
        "file:///scale/project/lops-oh-fair2adapt/riomar/GAMAR/": None,
    }
)

assert not os.path.exists(zarr_archive), f"SHOULD NOT EXIST ! {zarr_archive}"

repo = icechunk.Repository.create(
    storage,
    config,
    authorize_virtual_chunk_access=credentials,
)
# cree le dossier dans storage, qui ne doit pas
# exister. Ici :
# /scale/project/lops-oh-fair2adapt/fpaul/tmp/
# riomar.zarr
session = repo.writable_session("main")
ds.virtualize.to_icechunk(
    session.store
)  # ca cree l'archive dans le storage indiquee plus haut
session.commit("My first virtual store!")
'MWQF9SWYFPKN4GJH8AT0'

Quick check icechunk .zarr archive

import os

os.environ["SHELL"] = "/bin/bash"
# %env SHELL=/bin/bash
!du -hs {zarr_archive}

!ls -alh {zarr_archive}
16K	/scale/project/lops-oh-fair2adapt/fpaul/tmp/riomar.zarr
total 4.0K
drwxr-sr-x 7 fpaul fair2adapt 4.0K Dec 11 14:55 .
drwxr-sr-x 3 fpaul fair2adapt 4.0K Dec 11 14:54 ..
drwxr-sr-x 2 fpaul fair2adapt 4.0K Dec 11 14:53 chunks
-rw-r--r-- 1 fpaul fair2adapt  404 Dec 11 14:53 config.yaml
drwxr-sr-x 2 fpaul fair2adapt 4.0K Dec 11 14:55 manifests
drwxr-sr-x 3 fpaul fair2adapt 4.0K Dec 11 14:53 refs
drwxr-sr-x 2 fpaul fair2adapt 4.0K Dec 11 14:55 snapshots
drwxr-sr-x 2 fpaul fair2adapt 4.0K Dec 11 14:55 transactions

USELESS, A VIRER

%%bash
cd /scale/project/lops-oh-fair2adapt/fpaul/tmp
./make_sed_web.sh
ls -alh *.json
import json
from itertools import islice

fichier = outfile

with open(fichier, "r", encoding="utf-8") as f:
    data = json.load(f)  # charge le JSON même sur une seule ligne
    texte = json.dumps(data, indent=2, ensure_ascii=False)

for ligne in islice(texte.splitlines(), 20):  # 40 premières lignes
    print(ligne)

FIN