In [37]:
import copernicusmarine as cm
import pandas as pd
import xarray as xr
import hvplot.xarray
import hvplot.pandas
import numpy as np
import holoviews as hv
In [ ]:
cm.login()
In [24]:
ioc_cleanup = pd.read_csv('ioc_cleanup_2023.csv', index_col=0)
ioc_cleanup
Out[24]:
Station_Name ioc_code gloss_id latitude longitude Country connection contacts dcp_id last_observation_level ... number_of_years time_zone_hours datum_information instrument precision null_value gauge_type overall_record_quality gesla3_id seaset_id
125 Base O'Higgins ohig NaN -63.321 -57.901 Antarctica SXCH40 Servicio Hidrográfico y Oceanográfico de la Ar... ADC04BE6 1.75 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN 125
135 Puerto Deseado dese 190.0 -47.754 -65.915 Argentina SEPO40 Armada Argentina Servicio de Hidrografía Naval... 33912088 3.69 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN 135
136 Puerto Madryn madry 191.0 -42.763 -65.031 Argentina SEPO40 Armada Argentina Servicio de Hidrografía Naval... 335665D2 6.60 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN 136
139 Battery Point bapj NaN -42.892 147.338 Australia SZAU01 National Tidal Centre/Australian Bureau of Met... 61221 0.91 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN 139
140 Broome brom 40.0 -18.001 122.219 Australia SZAU01 National Tidal Centre/Australian Bureau of Met... 62650 7.69 ... 32.0 0.0 Unspecified Unspecified Unspecified -99.9999 Coastal No obvious issues Broome 140
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
5451 Woods Hole, MA wood NaN 41.523 -70.672 USA web National Ocean Service-NOAA ( USA ) 3340A79C 1.08 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN 5451
5462 Yakutat yaku NaN 59.548 -139.735 USA web National Ocean Service-NOAA ( USA ) 336B0522 2.04 ... 61.0 0.0 Station Datum Unspecified Unspecified -99.9999 Coastal No obvious issues Yakutat 5462
6129 Colombo colo 33.0 6.941 79.850 Sri Lanka SWIO40 National Aquatic Resources Research and Develo... 0650018E 1.39 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN 6129
6141 DART New Zealand E Coast dnze NaN -36.049 -177.708 New Zealand web GNS Science ( New Zealand ) 5401001 5776.60 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN 6141
6161 Prince Rupert prin2 155.0 54.317 -130.317 Canada web Fisheries and Oceans Canada ( Canada ) NaN 6.28 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN 6161

169 rows × 77 columns

In [27]:
# subset
ioc_re = ioc_cleanup[ioc_cleanup.ioc_code == 'boma']
ioc_re
Out[27]:
Station_Name ioc_code gloss_id latitude longitude Country connection contacts dcp_id last_observation_level ... number_of_years time_zone_hours datum_information instrument precision null_value gauge_type overall_record_quality gesla3_id seaset_id
3547 Boston boma NaN 42.355 -71.052 USA web National Ocean Service-NOAA ( USA ) 335E54EE 1.68 ... 101.0 0.0 Station Datum Unspecified Unspecified -99.9999 Coastal No obvious issues Boston 3547

1 rows × 77 columns

In [4]:
import os
os.makedirs('data/nc/', exist_ok=True)
os.makedirs('data/parquet/', exist_ok=True)
In [ ]:
for it, item in ioc_cleanup.iterrows():
    lonmax = np.ceil(item.longitude) + 1
    lonmin = np.floor(item.longitude)
    latmax = np.ceil(item.latitude)
    latmin = np.floor(item.latitude)
    cm.subset(
        dataset_id="cmems_mod_glo_phy_anfc_0.083deg_PT1H-m",
        variables=["zos"],
        minimum_longitude=lonmin,
        maximum_longitude=lonmax,
        minimum_latitude=latmin,
        maximum_latitude=latmax,
        start_datetime="2022-01-01T00:00:00",
        end_datetime="2023-12-31T23:00:00",
        output_filename = f"data/nc/{item.ioc_code}.nc", 
        force_download=True
    )
In [30]:
def get_closest_coordinates(x, y, ds):
    xx, yy = np.meshgrid(ds.longitude.values, ds.latitude.values)
    xall, yall = xx.ravel(), yy.ravel()
    data = ds.zos[-1, 0, :, :].values.ravel()
    non_nan_mask = ~np.isnan(data)
    xall_non_nan = xall[non_nan_mask]
    yall_non_nan = yall[non_nan_mask]
    distances_squared = (xall_non_nan - x)**2 + (yall_non_nan - y)**2
    # print(distances_squared)
    closest_idx = np.argmin(distances_squared)
    lo_, la_ = xall_non_nan[closest_idx], yall_non_nan[closest_idx]
    ilon = np.argmin(abs(ds.longitude.values - lo_))
    ilat = np.argmin(abs(ds.latitude.values - la_))
    return ilon, ilat

def extract_parquet(stations: pd.DataFrame):
    for it, item in stations.iterrows():
        print(item.ioc_code)
        ds = xr.open_dataset(f'data/nc/{item.ioc_code}.nc')
        ilon, ilat = get_closest_coordinates(item.longitude, item.latitude, ds)
        df = pd.DataFrame({'zos':ds.zos[:,0,ilat, ilon].values}, ds.time.values)
        df.to_parquet(f'data/parquet/{item.ioc_code}.parquet')
In [31]:
extract_parquet(ioc_cleanup)
ohig
dese
madry
bapj
brom
barn
djve
darw
pkem
pmur
ross
sprg
thev
trst
oste
bele
bamf
prin
stjo
vhbc
greg
ohig3
cald
coqu
corr
pich
ptal
pcha
pwil
qtro
quir
talc
viti
cher
dzao
herb
mare
nuku
stqy
stqy2
tubua
cuxh
helg
horn
itea
LA23
abur
fuka
hmda
hana
ishig
kusm
kush
naga
naha
omae
sado
saig
tosa
waka
kant
huat
chst
ande
honn
malo
rorv
treg
vard
davo
dapi
hie2
fue2
coru
arko
fors
furu
gokr
holm
kalit
karl
klag
kung
kungr
land
olan
oska
oxel
rata
simp
simr
smog
spik
visb
zanz
nkfa
amas
anta
igne
sile
abed
bang
crom
dove
harw
heys
holy
holy2
ilfa
kinl
leit
lerw
lerw2
live
lowe
mhav
mill
mumb
nhav
newl
newl2
npor
nshi
plym
porp
prus
ptmt
shee
stor
whit
wick
work
alam
aren
asto
atka
acnj
bamd
bame
benc
boma
bgct
amal
chrp
cwfl
cres
dkwa
datu
dpnc
dutc
elak
fpnt
guam
hilo
kahu
kawa
kwfl
lajo
lime
pagb
pslu
sdpt
sitk
wpwa
wood
yaku
colo
dnze
prin2
In [46]:
# check if we have data
for it, item in ioc_cleanup.iterrows():
    print(it, item.ioc_code)
    df = pd.read_parquet(f'./data/parquet/{item.ioc_code}.parquet')
    df.hvplot()
    if it > 200: 
        break
125 ohig
Out[46]:
135 dese
Out[46]:
136 madry
Out[46]:
139 bapj
Out[46]:
140 brom
Out[46]:
141 barn
Out[46]:
149 djve
Out[46]:
152 darw
Out[46]:
161 pkem
Out[46]:
164 pmur
Out[46]:
165 ross
Out[46]:
167 sprg
Out[46]:
168 thev
Out[46]:
169 trst
Out[46]:
221 oste
Out[46]: