! mkdir -p data
! wget https://www.ncei.noaa.gov/data/international-best-track-archive-for-climate-stewardship-ibtracs/v04r00/access/netcdf/IBTrACS.ALL.v04r00.nc -O data/IBTrACS.ALL.v04r00.nc

--2024-04-30 12:44:17--  https://www.ncei.noaa.gov/data/international-best-track-archive-for-climate-stewardship-ibtracs/v04r00/access/netcdf/IBTrACS.ALL.v04r00.nc
Resolving www.ncei.noaa.gov (www.ncei.noaa.gov)... 205.167.25.167, 205.167.25.168, 205.167.25.172, ...
Connecting to www.ncei.noaa.gov (www.ncei.noaa.gov)|205.167.25.167|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 26815169 (26M) [application/x-netcdf]
Saving to: ‘data/IBTrACS.ALL.v04r00.nc’

data/IBTrACS.ALL.v0 100%[===================>]  25,57M  2,19MB/s    in 9,2s    

2024-04-30 12:44:27 (2,79 MB/s) - ‘data/IBTrACS.ALL.v04r00.nc’ saved [26815169/26815169]

# load IBtracks data 
import xarray as xr 
import numpy as np
import pandas as pd
import tqdm
import glob
import os
import json
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from analysea.tide import detide 
import geopandas as gpd
from scipy.spatial import cKDTree

cyclones = xr.open_dataset('data/IBTrACS.ALL.v04r00.nc')
cyclones

/home/tomsail/work/gist/Validation_Protocol/.venv/lib/python3.11/site-packages/utide/harmonics.py:16: RuntimeWarning: invalid value encountered in cast
  nshallow = np.ma.masked_invalid(const.nshallow).astype(int)
/home/tomsail/work/gist/Validation_Protocol/.venv/lib/python3.11/site-packages/utide/harmonics.py:17: RuntimeWarning: invalid value encountered in cast
  ishallow = np.ma.masked_invalid(const.ishallow).astype(int) - 1

<xarray.Dataset> Size: 4GB
Dimensions:           (storm: 13806, date_time: 360, quadrant: 4)
Coordinates:
    time              (storm, date_time) datetime64[ns] 40MB ...
    lat               (storm, date_time) float32 20MB ...
    lon               (storm, date_time) float32 20MB ...
Dimensions without coordinates: storm, date_time, quadrant
Data variables: (12/147)
    numobs            (storm) float32 55kB ...
    sid               (storm) |S13 179kB ...
    season            (storm) float32 55kB ...
    number            (storm) int16 28kB ...
    basin             (storm, date_time) |S2 10MB ...
    subbasin          (storm, date_time) |S2 10MB ...
    ...                ...
    reunion_gust      (storm, date_time) float32 20MB ...
    reunion_gust_per  (storm, date_time) float32 20MB ...
    usa_seahgt        (storm, date_time) float32 20MB ...
    usa_searad        (storm, date_time, quadrant) float32 80MB ...
    storm_speed       (storm, date_time) float32 20MB ...
    storm_dir         (storm, date_time) float32 20MB ...
Attributes: (12/49)
    title:                      IBTrACS - International Best Track Archive fo...
    summary:                    The intent of the IBTrACS project is to overc...
    source:                     The original data are tropical cyclone positi...
    Conventions:                ACDD-1.3
    Conventions_note:           Data are nearly CF-1.7 compliant. The sole is...
    product_version:            v04r00
    ...                         ...
    history:                    Sun Apr 28 04:18:28 2024: ncks --no_abc --cnk...
    license:                    These data may be redistributed and used with...
    featureType:                trajectory
    cdm_data_type:              Trajectory
    comment:                    The tracks of TCs generally look like a traje...
    NCO:                        netCDF Operators version 4.8.1 (Homepage = ht...

[4970160 values with dtype=datetime64[ns]]

[4970160 values with dtype=float32]

[4970160 values with dtype=float32]

[13806 values with dtype=float32]

[13806 values with dtype=|S13]

[13806 values with dtype=float32]

[13806 values with dtype=int16]

[4970160 values with dtype=|S2]

# select only the summer 2023 period
indice_storm_2023 = []
def is_in_summer_2023(times):
    test1 = [pd.Timestamp(t.decode()) < pd.Timestamp('2023-11-30') for t in times]
    test2 = [pd.Timestamp(t.decode()) > pd.Timestamp('2023-01-01') for t in times]
    test = np.logical_and(test1, test2)
    return np.any(test)

for i_storm in tqdm.tqdm(cyclones.storm.values[13300:]): # no need to loop over 19th/20th century!! 
    times = cyclones.isel(storm=i_storm).iso_time.values
    if is_in_summer_2023(times): 
        indice_storm_2023.append(i_storm)
# takes ~ 20sec to run

100%|██████████| 506/506 [00:16<00:00, 30.12it/s]

def extract_specs(istorm): 
    event = cyclones.isel(storm=int(istorm))
    x, y, time = np.array(event.lon), np.array(event.lat), np.array(event.time)
    mask = ~np.isnan(x)
    x = x[mask]
    y = y[mask]
    time = time[mask]
    wind_max = np.array(event.usa_wind)[mask]
    r64 = np.array(event.usa_rmw)[mask]/(111 * np.cos(np.deg2rad(y)))
    name = np.array(event.name)
    basin = np.array(event.basin)[mask]
    return x, y, time, wind_max, r64, name, basin

def plot_cyclone_track(x, y, wind_max, r64, name, ax = None):
    if ax is None:
        fig, ax = plt.subplots(1,1, figsize=(16,8))
    im = ax.scatter(x, y, c = wind_max, label=name, s = 1, cmap = 'jet', vmin = 0, vmax = 100)
    for ix, xi in enumerate(x):
        circ = plt.Circle((x[ix], y[ix]), 
                          radius=r64[ix], 
                          fill=False, 
                          hatch = '////', 
                          color = cm.jet(np.min([1, wind_max[ix]/100])), 
                          alpha = 0.3)
        ax.add_patch(circ)
    # ax.axis('equal')
    return ax, im

# check 2023 cyclone tracks
fig, ax = plt.subplots(1,1, figsize=(16,8))
for istorm in indice_storm_2023:
    x, y, time, wind_max, r64, name, basin = extract_specs(istorm)
    ax, im = plot_cyclone_track(x, y, wind_max, r64, name, ax)
    if max(wind_max) > 100: 
        print(f"index IBtracks #{istorm}, name: {name}, max wind {np.max(wind_max)}, start: {np.min(time)} end: {np.max(time)}")
plt.colorbar(im, ax = ax, orientation = 'horizontal', fraction = 0.05, aspect = 60)
plt.tight_layout()

index IBtracks #13704, name: b'FREDDY', max wind 138.0, start: 2023-02-06T06:00:00.000039936 end: 2023-02-24T06:00:00.000039936
index IBtracks #13708, name: b'JUDY', max wind 109.0, start: 2023-02-27T00:00:00.000039936 end: 2023-03-04T18:00:00.000039936
index IBtracks #13709, name: b'KEVIN', max wind 134.0, start: 2023-03-01T18:00:00.000039936 end: 2023-03-07T06:00:00.000039936
index IBtracks #13711, name: b'HERMAN', max wind 128.0, start: 2023-03-29T12:00:00.000039936 end: 2023-04-02T12:00:00.000039936
index IBtracks #13712, name: b'ILSA', max wind 128.0, start: 2023-04-08T18:00:00.000039936 end: 2023-04-13T18:00:00.000039936
index IBtracks #13714, name: b'MOCHA', max wind 138.0, start: 2023-05-11T00:00:00.000039936 end: 2023-05-14T18:00:00.000039936
index IBtracks #13716, name: b'MAWAR', max wind 159.0, start: 2023-05-20T00:00:00.000039936 end: 2023-06-03T06:00:00.000039936
index IBtracks #13718, name: b'BIPARJOY', max wind 105.0, start: 2023-06-06T06:00:00.000039936 end: 2023-06-16T12:00:00.000039936
index IBtracks #13725, name: b'CALVIN', max wind 110.0, start: 2023-07-09T00:00:00.000039936 end: 2023-07-19T18:00:00.000039936
index IBtracks #13729, name: b'DOKSURI', max wind 128.0, start: 2023-07-21T12:00:00.000039936 end: 2023-07-28T06:00:00.000039936
index IBtracks #13730, name: b'KHANUN', max wind nan, start: 2023-07-27T06:00:00.000039936 end: 2023-08-10T18:00:00.000039936
index IBtracks #13731, name: b'DORA', max wind 125.0, start: 2023-07-28T18:00:00.000039936 end: 2023-08-16T00:00:00.000039936
index IBtracks #13734, name: b'LAN', max wind 115.0, start: 2023-08-07T18:00:00.000039936 end: 2023-08-17T12:00:00.000039936
index IBtracks #13735, name: b'FERNANDA', max wind 115.0, start: 2023-08-10T00:00:00.000039936 end: 2023-08-17T12:00:00.000039936
index IBtracks #13737, name: b'HILARY', max wind 125.0, start: 2023-08-13T18:00:00.000039936 end: 2023-08-21T18:00:00.000039936
index IBtracks #13740, name: b'FRANKLIN', max wind 130.0, start: 2023-08-19T18:00:00.000039936 end: 2023-09-09T18:00:00.000039936
index IBtracks #13744, name: b'SAOLA', max wind 134.0, start: 2023-08-23T18:00:00.000039936 end: 2023-09-03T12:00:00.000039936
index IBtracks #13745, name: b'IDALIA', max wind 115.0, start: 2023-08-26T12:00:00.000039936 end: 2023-09-08T06:00:00.000039936
index IBtracks #13746, name: b'HAIKUI', max wind 105.0, start: 2023-08-28T12:00:00.000039936 end: 2023-09-04T18:00:00.000039936
index IBtracks #13750, name: b'JOVA', max wind 140.0, start: 2023-09-02T00:00:00.000039936 end: 2023-09-10T18:00:00.000039936
index IBtracks #13751, name: b'LEE', max wind 145.0, start: 2023-09-05T12:00:00.000039936 end: 2023-09-18T18:00:00.000039936
index IBtracks #13762, name: b'KOINU', max wind 119.0, start: 2023-09-29T12:00:00.000039936 end: 2023-10-10T00:00:00.000039936
index IBtracks #13763, name: b'LIDIA', max wind 120.0, start: 2023-09-30T18:00:00.000039936 end: 2023-10-11T06:00:00.000039936
index IBtracks #13765, name: b'BOLAVEN', max wind 154.0, start: 2023-10-07T00:00:00.000039936 end: 2023-10-14T12:00:00.000039936
index IBtracks #13767, name: b'NORMA', max wind 115.0, start: 2023-10-15T00:00:00.000039936 end: 2023-10-23T12:00:00.000039936
index IBtracks #13768, name: b'OTIS', max wind 145.0, start: 2023-10-17T18:00:00.000039936 end: 2023-10-25T18:00:00.000039936
index IBtracks #13771, name: b'TEJ', max wind 109.0, start: 2023-10-20T12:00:00.000039936 end: 2023-10-24T06:00:00.000039936
index IBtracks #13772, name: b'LOLA', max wind 119.0, start: 2023-10-21T18:00:00.000039936 end: 2023-10-26T00:00:00.000039936

# get clean tide gauges 
def files_in_folder(folder, ext = '.csv'):
    list_files = []
    for item in glob.glob(folder + '*'):
        tmp = item.split(ext)[0]
        root, name = os.path.split(tmp)
        if item.endswith(ext):
            if ext == ".parquet":
                list_files.append(name)
            elif ext == ".csv":
                list_files.append(name)
    return list_files

CLEAN_FOLDER = "/home/tomsail/work/python/seareport_org/ioc_cleanup/clean/"
list_TG_2023 = files_in_folder(CLEAN_FOLDER, ext = '.parquet')
len(list_TG_2023)

182

## get SEASET stations -- latest version
seaset_full = pd.read_csv("/home/tomsail/Documents/work/python/oceanmodelling/seaset/Notebooks/catalog_full_updated.csv", index_col=0)
# some ioc code got remove for same location ex prin/prin2

def is_similar_station(ioc_code, station_list):
    return any(station.startswith(ioc_code) for station in station_list)

ioc_seaset = seaset_full.dropna(subset='ioc_code')
subset_2023 = ioc_seaset[ioc_seaset['ioc_code'].apply(is_similar_station, station_list=list_TG_2023)]
subset_2023

dist_max = 5 # in degrees

def dist(lon1, lat1, lon2, lat2):
    return np.sqrt((lat2 - lat1) ** 2 + (lon2 - lon1) **2)

def is_close_station(station, lons, lats, dist_max = 5):
    lons[lons > 180] = lons[lons > 180] - 360
    lon, lat = station
    return np.any(dist(lon, lat, lons, lats) < dist_max)

def get_tracks(cyclone_data):
    lons = []
    lats = []
    for storm in cyclone_data.storm.values:
        lons.extend(cyclone_data.isel(storm=storm).lon.values)
        lats.extend(cyclone_data.isel(storm=storm).lat.values)
    return lons, lats

def subset_from_cyclone(df, cyclone_data, dist_max = 5):
    lons, lats = get_tracks(cyclone_data)
    close_stations = df[df
    .apply(
        lambda row: is_close_station(
            (row['longitude'], row['latitude']), 
            np.array(lons), 
            np.array(lats), 
            dist_max = dist_max
        ), 
        axis=1
    )]
    return close_stations

def subset_from_tracks(df, lons, lats, dist_max = 5):
    close_stations = df[df
    .apply(
        lambda row: is_close_station(
            (row['longitude'], row['latitude']), 
            np.array(lons), 
            np.array(lats),
            dist_max = dist_max
        ), 
        axis=1
    )]
    return close_stations

# superpose with 2023 cyclone tracks
fig, ax = plt.subplots(1,1, figsize=(16,8))
for istorm in indice_storm_2023:
    x, y, time, wind_max, r64, name, basin = extract_specs(istorm)
    ax, im = plot_cyclone_track(x, y, wind_max, r64, name, ax)
stations_impacted_all = subset_from_cyclone(subset_2023, cyclones.isel(storm=indice_storm_2023))
stations_impacted_all.plot.scatter(x='longitude', y='latitude', ax=ax, s= 100, c = 'r', marker = "*", edgecolor = 'k')

<Axes: xlabel='longitude', ylabel='latitude'>

basins = {
    "East Pacific":  'EP',
    "North Atlantic":'NA',
    "North Indian":  'NI',
    "South Indian":  'SI',
    "South Pacific": 'SP',
    "West Pacific":  'WP',
}

out = dict()
for basin in basins.keys():
    zone = basins[basin]
    for istorm in indice_storm_2023:
        x, y, time, wind_max, r64, name, basins_ = extract_specs(istorm)
        list_ = [b.decode() for b in basins_]
        if zone in list_:
            stations_impacted = subset_from_tracks(subset_2023, x, y)
            stations_impacted.plot.scatter(x='longitude', y='latitude',ax=ax, s= 100, c = 'r', marker = "*", edgecolor = 'k')
            if len(stations_impacted) > 0:
                params = {
                    'name': str(name)[2:-1], #byte litteral
                    'start' : pd.Timestamp(time[0]).strftime('%Y-%m-%d %H:%M:%S'),
                    'end' : pd.Timestamp(time[-1]).strftime('%Y-%m-%d %H:%M:%S'),
                    'stations': [c_ for c_ in stations_impacted.ioc_code.values]
                }
                out[str(istorm)] = params
                print(params)
with open('stations_impacted_2023.json', 'w') as f:
    json.dump(out, f, indent=2)

{'name': 'BEATRIZ', 'start': '2023-06-26 12:00:00', 'end': '2023-07-01 12:00:00', 'stations': ['huat']}
{'name': 'CALVIN', 'start': '2023-07-09 00:00:00', 'end': '2023-07-19 18:00:00', 'stations': ['hilo', 'kahu', 'kawa']}
{'name': 'DORA', 'start': '2023-07-28 18:00:00', 'end': '2023-08-16 00:00:00', 'stations': ['huat']}
{'name': 'EUGENE', 'start': '2023-08-03 00:00:00', 'end': '2023-08-07 18:00:00', 'stations': ['huat']}
{'name': 'HILARY', 'start': '2023-08-13 18:00:00', 'end': '2023-08-21 18:00:00', 'stations': ['alam', 'fpnt', 'lajo', 'pslu']}
{'name': 'JOVA', 'start': '2023-09-02 00:00:00', 'end': '2023-09-10 18:00:00', 'stations': ['huat']}
{'name': 'MAX', 'start': '2023-10-05 00:00:00', 'end': '2023-10-10 06:00:00', 'stations': ['huat']}
{'name': 'NORMA', 'start': '2023-10-15 00:00:00', 'end': '2023-10-23 12:00:00', 'stations': ['huat']}
{'name': 'OTIS', 'start': '2023-10-17 18:00:00', 'end': '2023-10-25 18:00:00', 'stations': ['huat']}
{'name': 'PILAR', 'start': '2023-10-24 12:00:00', 'end': '2023-11-06 00:00:00', 'stations': ['huat']}
{'name': 'ARLENE', 'start': '2023-05-31 18:00:00', 'end': '2023-06-04 06:00:00', 'stations': ['cwfl', 'kwfl']}
{'name': 'BRET', 'start': '2023-06-19 06:00:00', 'end': '2023-06-24 18:00:00', 'stations': ['lime']}
{'name': 'GERT', 'start': '2023-08-19 00:00:00', 'end': '2023-09-04 12:00:00', 'stations': ['lime']}
{'name': 'FRANKLIN', 'start': '2023-08-19 18:00:00', 'end': '2023-09-09 18:00:00', 'stations': ['amal', 'lime']}
{'name': 'IDALIA', 'start': '2023-08-26 12:00:00', 'end': '2023-09-08 06:00:00', 'stations': ['benc', 'cwfl', 'dpnc', 'kwfl']}
{'name': 'LEE', 'start': '2023-09-05 12:00:00', 'end': '2023-09-18 18:00:00', 'stations': ['stjo', 'bame', 'boma', 'amal', 'wood']}
{'name': 'OPHELIA', 'start': '2023-09-21 12:00:00', 'end': '2023-09-24 18:00:00', 'stations': ['acnj', 'bamd', 'benc', 'bgct', 'dpnc']}
{'name': 'PHILIPPE', 'start': '2023-09-23 06:00:00', 'end': '2023-10-06 12:00:00', 'stations': ['amal', 'lime']}
{'name': 'TAMMY', 'start': '2023-10-18 18:00:00', 'end': '2023-10-31 18:00:00', 'stations': ['amal', 'lime']}
{'name': 'ELLIE', 'start': '2022-12-22 06:00:00', 'end': '2023-01-06 18:00:00', 'stations': ['brom', 'darw']}
{'name': 'CHENESO', 'start': '2023-01-17 18:00:00', 'end': '2023-01-30 06:00:00', 'stations': ['dzao']}
{'name': 'FREDDY', 'start': '2023-02-06 06:00:00', 'end': '2023-02-24 06:00:00', 'stations': ['djve']}
{'name': 'ILSA', 'start': '2023-04-08 18:00:00', 'end': '2023-04-13 18:00:00', 'stations': ['brom', 'djve', 'darw']}
{'name': 'HALE', 'start': '2023-01-06 18:00:00', 'end': '2023-01-08 06:00:00', 'stations': ['ross', 'mare']}
{'name': 'IRENE', 'start': '2023-01-18 06:00:00', 'end': '2023-01-19 12:00:00', 'stations': ['mare']}
{'name': 'NOT_NAMED', 'start': '2023-01-20 18:00:00', 'end': '2023-01-21 12:00:00', 'stations': ['mare']}
{'name': 'JUDY', 'start': '2023-02-27 00:00:00', 'end': '2023-03-04 18:00:00', 'stations': ['mare']}
{'name': 'KEVIN', 'start': '2023-03-01 18:00:00', 'end': '2023-03-07 06:00:00', 'stations': ['mare']}
{'name': 'LOLA', 'start': '2023-10-21 18:00:00', 'end': '2023-10-26 00:00:00', 'stations': ['mare']}
{'name': 'MAL', 'start': '2023-11-13 00:00:00', 'end': '2023-11-16 00:00:00', 'stations': ['viti']}
{'name': 'SANVU', 'start': '2023-04-19 12:00:00', 'end': '2023-04-22 06:00:00', 'stations': ['dkwa']}
{'name': 'MAWAR', 'start': '2023-05-20 00:00:00', 'end': '2023-06-03 06:00:00', 'stations': ['abur', 'ishig', 'kusm', 'naga', 'naha', 'omae', 'tosa', 'guam', 'pagb']}
{'name': 'GUCHOL', 'start': '2023-06-06 00:00:00', 'end': '2023-06-13 12:00:00', 'stations': ['kusm', 'naha', 'omae']}
{'name': 'KHANUN', 'start': '2023-07-27 06:00:00', 'end': '2023-08-10 18:00:00', 'stations': ['abur', 'hmda', 'ishig', 'naga', 'naha', 'saig', 'tosa']}
{'name': 'DORA', 'start': '2023-07-28 18:00:00', 'end': '2023-08-16 00:00:00', 'stations': ['huat']}
{'name': 'LAN', 'start': '2023-08-07 18:00:00', 'end': '2023-08-17 12:00:00', 'stations': ['abur', 'fuka', 'hmda', 'hana', 'kusm', 'kush', 'omae', 'sado', 'saig', 'tosa', 'waka']}
{'name': 'DAMREY', 'start': '2023-08-23 06:00:00', 'end': '2023-08-28 18:00:00', 'stations': ['hana', 'kush']}
{'name': 'SAOLA', 'start': '2023-08-23 18:00:00', 'end': '2023-09-03 12:00:00', 'stations': ['ishig']}
{'name': 'HAIKUI', 'start': '2023-08-28 12:00:00', 'end': '2023-09-04 18:00:00', 'stations': ['ishig', 'naha']}
{'name': 'KIROGI', 'start': '2023-08-30 00:00:00', 'end': '2023-09-04 18:00:00', 'stations': ['abur', 'hmda', 'kusm', 'naga', 'omae', 'saig', 'tosa', 'dkwa']}
{'name': 'YUN-YEUNG', 'start': '2023-09-05 18:00:00', 'end': '2023-09-08 18:00:00', 'stations': ['kusm', 'omae', 'sado', 'tosa']}
{'name': 'KOINU', 'start': '2023-09-29 12:00:00', 'end': '2023-10-10 00:00:00', 'stations': ['ishig']}
{'name': 'BOLAVEN', 'start': '2023-10-07 00:00:00', 'end': '2023-10-14 12:00:00', 'stations': ['dkwa', 'guam', 'pagb']}

! mkdir -p data/surge

SURGE_FOLDER = "./data/surge/"

for ii,ioc_code in enumerate(subset_2023.ioc_code):
    lat = subset_2023.iloc[ii].latitude
    if not os.path.exists(SURGE_FOLDER + f"{ioc_code}.parquet"):
        df = pd.read_parquet(CLEAN_FOLDER + f"{ioc_code}.parquet")
        surge = detide(df[df.columns[0]], lat=lat, resample_detide = True)
        surge.to_frame().to_parquet(SURGE_FOLDER + f"{ioc_code}.parquet")
# 1 hour without resampling 
# 30 sec

BASE = "/home/tomsail/Documents/work/python/pyPoseidon/Tutorial/models/"
v0 = "/home/tomsail/work/python/pyPoseidon/Tutorial/models/v0.0/telemac/results_2D.nc"
v0p2 = "/home/tomsail/work/python/pyPoseidon/Tutorial/models/v0.2/telemac/results_2D.nc"
# v2p0 = "/home/tomsail/work/models/results/global-v2/202307_2D_tri.zarr"

## load result file -- 2D
model_tel_v0 = xr.open_dataset(v0)
model_tel_v0p2 = xr.open_dataset(v0p2)
# model_tel_v2 = xr.open_dataset(v2p0)
model_tel_v0p2

<xarray.Dataset> Size: 26GB
Dimensions:     (time: 7272, node: 301572, face: 588314, max_no_vertices: 3)
Coordinates:
    longitude   (node) float32 1MB ...
    latitude    (node) float32 1MB ...
  * time        (time) datetime64[ns] 58kB 2023-01-01 ... 2023-10-30T23:00:00
Dimensions without coordinates: node, face, max_no_vertices
Data variables:
    u           (time, node) float32 9GB ...
    v           (time, node) float32 9GB ...
    elev        (time, node) float32 9GB ...
    face_nodes  (face, max_no_vertices) int32 7MB ...
Attributes:
    Conventions:  CF-1.0, UGRID-1.0
    title:        TELEMAC Model output
    source:       TELEMAC model output version vV8P4
    references:   https://gitlab.pam-retd.fr/otm/telemac-mascaret
    history:      created by pyposeidon
    comment:      TELEMAC Model output
    type:         TELEMAC Model output

[301572 values with dtype=float32]

[301572 values with dtype=float32]

array(['2023-01-01T00:00:00.000000000', '2023-01-01T01:00:00.000000000',
       '2023-01-01T02:00:00.000000000', ..., '2023-10-30T21:00:00.000000000',
       '2023-10-30T22:00:00.000000000', '2023-10-30T23:00:00.000000000'],
      dtype='datetime64[ns]')

[2193031584 values with dtype=float32]

[2193031584 values with dtype=float32]

[2193031584 values with dtype=float32]

[1764942 values with dtype=int32]

PandasIndex(DatetimeIndex(['2023-01-01 00:00:00', '2023-01-01 01:00:00',
               '2023-01-01 02:00:00', '2023-01-01 03:00:00',
               '2023-01-01 04:00:00', '2023-01-01 05:00:00',
               '2023-01-01 06:00:00', '2023-01-01 07:00:00',
               '2023-01-01 08:00:00', '2023-01-01 09:00:00',
               ...
               '2023-10-30 14:00:00', '2023-10-30 15:00:00',
               '2023-10-30 16:00:00', '2023-10-30 17:00:00',
               '2023-10-30 18:00:00', '2023-10-30 19:00:00',
               '2023-10-30 20:00:00', '2023-10-30 21:00:00',
               '2023-10-30 22:00:00', '2023-10-30 23:00:00'],
              dtype='datetime64[ns]', name='time', length=7272, freq=None))

# load json
import json

with open("./data/storms/cyclones_2023.json", "r") as f:
    events = json.load(f)

ds_2D = [model_tel_v0, model_tel_v0p2 ]
version2D = ["v0.0", "v0.2.0","v0.2.1", "v2.0"]
alpha = [1, 0.6, 0.4, 0.3,  0.3]
colors = ['blue', 'red', 'purple', 'brown', 'green']

def is_overlapping(tris, meshx):
    PIR = 180
    x1, x2, x3 = meshx[tris].T
    return np.logical_or(abs(x2 - x1) > PIR, abs(x3 - x1) > PIR)

def extract_max_elev(ds, times): 
    maxH = ds.elev.sel(time=times, method='nearest').max(dim='time')
    return maxH

def plot_max_elev(ax, ds, times): 
    m = is_overlapping(ds.face_nodes,ds.longitude)
    max_elev = extract_max_elev(ds,times)
    im = ax.tricontourf(
        ds.longitude,
        ds.latitude,
        ds.face_nodes[~m],
        max_elev.values, 
        levels = np.arange(-0, 0.5, 0.02), 
        extend = 'both')
    return im

def closest_n_points(nodes, N, meshXY, dist_max=np.inf):
    mytree = cKDTree(meshXY)
    d_, indice = mytree.query(nodes, range(1, N + 1))
    indice[d_ > dist_max] = -1
    mask = indice != -1
    return indice[mask].T, d_[mask].T

def extract_t_elev_1D(ds, seaset_id):
    idx_ds = np.where(ds.seaset_id == seaset_id)[0]
    if len(idx_ds) > 0:
        elev_ = ds.isel(node=idx_ds[0]).elev.values
        t_ = [pd.Timestamp(ti) for ti in ds.isel(node=idx_ds[0]).time.values]
    else: 
        print(f"station: {ioc_code}, seaset_id: {seaset_id} not found in model")
        t_ = None; elev_ = None
    return pd.Series(elev_, index=t_)

def extract_t_elev_2D(ds, x, y):
    lons, lats = ds.longitude.values, ds.latitude.values
    indx, dist_ = closest_n_points(np.array([x, y]).T, 1, np.array([lons,lats]).T)
    ds_ = ds.isel(node=indx[0])
    elev_ = ds_.elev.values
    t_ = [pd.Timestamp(ti) for ti in ds_.time.values]
    return pd.Series(elev_, index=t_), np.round(dist_, 2)

def get_corr(df1: pd.DataFrame, df2: pd.Series): 
    ts1, ts2 = df1.align(df2, axis = 0)
    ts1 = ts1.interpolate()
    nan_mask1 = pd.isna(ts1)
    nan_mask2 = pd.isna(ts2)
    nan_mask = np.logical_or(nan_mask1.values.T[0], nan_mask2.values)
    ts1 = ts1[~nan_mask]
    ts2 = ts2[~nan_mask]
    corr = ts1.corr(ts2)
    return np.round(corr, 2), ts1, ts2

def get_percentiles(ts1, ts2):
    x = np.arange(0, 0.99, 0.001)
    x = np.hstack([x, np.arange(0.99, 1, 0.0001)])
    pc1 = np.zeros(len(x))
    pc2 = np.zeros(len(x))
    for it, thd in enumerate(x):
        pc1[it] = ts1.quantile(thd)
        pc2[it] = ts2.quantile(thd)
    return pc1, pc2

%matplotlib widget

for storm in events:
    fig, ax = plt.subplots(1,1, figsize=(14,8))
    x, y, time, wind_max, r64, name, basin = extract_specs(storm)
    xmin, xmax = np.min(x), np.max(x)
    ymin, ymax = np.min(y), np.max(y)
    tmin = pd.Timestamp(events[storm]['start'])
    tmax = pd.Timestamp(events[storm]['end'])
    name = events[storm]['name']
    im = plot_max_elev(ax, ds_2D[-1], pd.date_range(tmin, tmax, freq='1h'))
    ax, im2 = plot_cyclone_track(x, y, wind_max, r64, name, ax = ax)
    ax.axis('equal')
    if name == "BABET": xmin, xmax, ymin, ymax = -10, 20, 40, 70 #
    ax.set_xlim(xmin - 5 , xmax + 5 )
    ax.set_ylim(ymin - 5 , ymax + 5 )
    ax.set_title(f"cyclone {name}, id#{storm} from {pd.Timestamp(tmin)} to {pd.Timestamp(tmax)}")
    plt.colorbar(im2, ax = ax, orientation = 'horizontal', pad = 0.07, fraction = 0.05, aspect = 60, label = 'max wind speed (m/s)')
    plt.colorbar(im, ax = ax, pad = 0.02, fraction = 0.05, label = 'max elevation (m)')
    stations_impacted = subset_2023[subset_2023.ioc_code.isin(events[storm]['stations'])]
    plt.tight_layout()
    # 
    fig1, ax1 = plt.subplots(len(stations_impacted), 1, figsize=(14,3*len(stations_impacted)))
    if len(stations_impacted) == 1: ax1 = [ax1]
    for i_s, ioc_code in enumerate(stations_impacted.ioc_code):
        s = stations_impacted.iloc[i_s]
        xl, yl = s.longitude, s.latitude
        ax.scatter(xl, yl,
            s=100, 
            lw=0.5, 
            c = colors[i_s % len(colors)], 
            marker = "*", 
            edgecolors = 'white', 
            label = ioc_code)
        obs = pd.read_parquet(SURGE_FOLDER + f"{ioc_code}.parquet")
        seaset_id = s.seaset_id
        
        # observations
        obs = obs.loc[tmin:tmax]
        ax1[i_s].plot(obs.index, obs.values , label=f"{ioc_code}", color = 'k', linestyle = '--')

        # models
        for ids, ds in enumerate(ds_2D):
            mod , d_= extract_t_elev_2D(ds, xl, yl)
            corr, ts1, ts2 = get_corr(mod, obs[obs.columns[0]])
            ax1[i_s].plot(mod.index,mod.values , label=f"model 2D {version2D[ids]}", color = colors[i_s % len(colors)], alpha = alpha[ids])

        ax1[i_s].set_xlim(tmin, tmax)
        ax1[i_s].legend()
    ax1[0].set_title(f"cyclone {name}, id#{storm}")
    ax.legend()
    plt.tight_layout()
    print(storm, name)

13737 HILARY
13745 IDALIA
13751 LEE
13758 OPHELIA
13699 ELLIE
13712 ILSA
13708 JUDY
13709 KEVIN
13716 MAWAR
13730 KHANUN

/tmp/ipykernel_20123/107026689.py:2: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`). Consider using `matplotlib.pyplot.close()`.
  fig, ax = plt.subplots(1,1, figsize=(14,8))

13734 LAN
13762 KOINU
13765 BOLAVEN
-1 BABET

tmin = pd.Timestamp(2023,1,1)
tmax = pd.Timestamp(2023,10,31)

list_surge_manual = ['viti', 'prus', 'mill', 'pkem', 'vard', 'wood', 'kahu', 'cald2', 'hmda', 'kinl', 'benc', 'dzao', 
                     'lerw2', 'live', 'cres', 'hie2', 'ptmt', 'ilfa', 'wick', 'rorv', 'pwil2', 'dkwa', 'bame', 'abed', 'lime', 'nshi', 
                     'newl',  'chst', 'bamf', 'kush', 'greg', 'bamd', 'honn', 'dutc',  'talc2', 'dpnc', 'quir2', 'fpnt', 'chrp', 'ishig', 
                     'bgct',  'vhbc', 'thev', 'npor', 'pslu', 'cher', 'boma', 'stjo', 'djve', 'ross', 'elak', 'hana', 'saig', 'pich2', 
                     'dapi', 'ptal2', 'stor', 'ande', 'aren', 'ohig3', 'newl2', 'trst', 'kusm', 'alam', 'leit', 'sado', 'asto', 'shee', 
                     'malo', 'corr2', 'prin2', 'nkfa', 'heys', 'helg', 'kawa', 'atka', 'qtro2', 'dove', 'waka', 'treg', 'cuxh', 'tosa', 
                     'yaku', 'brom', 'smog', 'darw', 'cwfl', 'huat', 'sprg', 'fue2', 'abur', 'guam', 'kwfl', 'pcha2', 'barn', 
                     'bapj', 'amal', 'hilo', 'wpwa', 'plym', 'coru', 'gokr', 'pmur', 'whit', 'nhav', 'porp', 'kungr', 'mumb', 'stqy2', 'crom', 
                     'sitk', 'stqy', 'oste', 'acnj', 'bang', 'naga', 'mare', 'fuka', 'herb', 'pagb', 'work', 'mhav', 'lajo', 'harw', 
                     'omae', 'coqu2', 'holy2', 'horn', 'sdpt', 'lowe', 'naha']
surge_stations = subset_2023[subset_2023.ioc_code.isin(list_surge_manual)]
countries = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))

for i_s, ioc_code in tqdm.tqdm(enumerate(surge_stations.ioc_code)):        
    # get station coordinates
    s = surge_stations.iloc[i_s]
    xl, yl = s.longitude, s.latitude
    # get observations
    obs = pd.read_parquet(SURGE_FOLDER + f"{ioc_code}.parquet")
    obs = obs.loc[tmin:tmax]
    seaset_id = s.seaset_id
    # 
    mod , d_= extract_t_elev_2D(ds_2D[-1], xl, yl) # we take the v0.2 results only
    corr, ts2, ts1 = get_corr(mod, obs[obs.columns[0]])
    if corr > 0.0: 
        # if coefficient is too low, we don't plot it
        fig = plt.figure(figsize=(16,5))
        gs = fig.add_gridspec(1,3,  width_ratios=(2, 1, 1),
                            left=0, right=0.99, bottom=0, top=0.99,
                            wspace=0.07, hspace=0.03)
        ax_plot1 = fig.add_subplot(gs[ 0])
        ax_plot2 = fig.add_subplot(gs[ 1], sharey = ax_plot1)
        ax_map = fig.add_subplot(gs[ 2])

        ax.scatter(xl, yl,
            s=100, 
            lw=0.5, 
            c = colors[i_s % len(colors)], 
            marker = "*", 
            edgecolors = 'white', 
            label = ioc_code)
        
        # models
        for ids, ds in enumerate(ds_2D[-1:]):
            mod , d_= extract_t_elev_2D(ds, xl, yl)
            corr, ts2, ts1 = get_corr(mod, obs[obs.columns[0]])
            mod95 = mod.quantile(0.95)
            corr95, _, _ = get_corr(mod[mod > mod95], obs[obs.columns[0]])
            mod99 = mod.quantile(0.99)
            corr99, _, _ = get_corr(mod[mod > mod99], obs[obs.columns[0]])
            ax_plot1.plot(mod.index,mod.values , label=f"model {version2D[ids]}", color = colors[i_s % len(colors)], alpha = alpha[ids])
        ax_plot2.scatter(ts1.values, ts2.values, c= 'k', label=f"model {version2D[ids]}, dist={d_}, Cr={corr}", s=1, alpha = 0.3)
        ax_plot2.set_xlabel('measured data')
        ax_plot2.set_ylabel('modelled data')
        pc1, pc2 = get_percentiles(ts1, ts2)
        ax_plot2.axline([0,0],slope = 1, lw =1, linestyle = '--', color = 'k')
        ax_plot2.scatter(pc1, pc2, color = colors[i_s % len(colors)], alpha = alpha[ids])
        ax_plot2.plot(pc1, pc2, color = colors[i_s % len(colors)], alpha = alpha[ids])
        # observations
        ax_plot1.plot(obs.index, obs.values , label=f"{ioc_code}", color = 'k', linestyle = '--')
        ax_plot1.set_xlim(tmin, tmax)
        ax_plot1.legend()
        ax_plot2.legend()
        ax_plot1.grid(axis='both', color = 'grey')
        ax_plot2.grid(axis='both', color = 'grey')
        # map
        ax_map.scatter(xl, yl, marker = "*", c = 'r')
        _ = countries.plot(color='lightgrey', ax=ax_map, zorder=-1)
        ax_map.set_xlim(xl-20, xl+20)
        ax_map.set_ylim(yl-20, yl+20)

/tmp/ipykernel_20123/1650063035.py:15: FutureWarning: The geopandas.dataset module is deprecated and will be removed in GeoPandas 1.0. You can get the original 'naturalearth_lowres' data from https://www.naturalearthdata.com/downloads/110m-cultural-vectors/.
  countries = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
121it [07:34,  3.75s/it]

	Station_Name	ioc_code	gloss_id	latitude	longitude	Country	connection	contacts	dcp_id	last_observation_level	...	number_of_years	time_zone_hours	datum_information	instrument	precision	null_value	gauge_type	overall_record_quality	gesla3_id	seaset_id
125	Base O'Higgins	ohig	NaN	-63.321	-57.901	Antarctica	SXCH40	Servicio Hidrográfico y Oceanográfico de la Ar...	ADC04BE6	1.75	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	125
135	Puerto Deseado	dese	190.0	-47.754	-65.915	Argentina	SEPO40	Armada Argentina Servicio de Hidrografía Naval...	33912088	3.69	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	135
136	Puerto Madryn	madry	191.0	-42.763	-65.031	Argentina	SEPO40	Armada Argentina Servicio de Hidrografía Naval...	335665D2	6.60	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	136
139	Battery Point	bapj	NaN	-42.892	147.338	Australia	SZAU01	National Tidal Centre/Australian Bureau of Met...	61221	0.91	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	139
140	Broome	brom	40.0	-18.001	122.219	Australia	SZAU01	National Tidal Centre/Australian Bureau of Met...	62650	7.69	...	32.0	0.0	Unspecified	Unspecified	Unspecified	-99.9999	Coastal	No obvious issues	Broome	140
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
5451	Woods Hole, MA	wood	NaN	41.523	-70.672	USA	web	National Ocean Service-NOAA ( USA )	3340A79C	1.08	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	5451
5462	Yakutat	yaku	NaN	59.548	-139.735	USA	web	National Ocean Service-NOAA ( USA )	336B0522	2.04	...	61.0	0.0	Station Datum	Unspecified	Unspecified	-99.9999	Coastal	No obvious issues	Yakutat	5462
6129	Colombo	colo	33.0	6.941	79.850	Sri Lanka	SWIO40	National Aquatic Resources Research and Develo...	0650018E	1.39	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	6129
6141	DART New Zealand E Coast	dnze	NaN	-36.049	-177.708	New Zealand	web	GNS Science ( New Zealand )	5401001	5776.60	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	6141
6161	Prince Rupert	prin2	155.0	54.317	-130.317	Canada	web	Fisheries and Oceans Canada ( Canada )	NaN	6.28	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	6161

Protocol for skill assessment¶

1 - get information cyclone / event¶

2 - compare the cyclone tracks with tide gauges locations¶

2.1 - get clean tide gauges¶

2.2 - identify tide gauges with SEASET¶

2.3 - refine to stations which are in the vicity of the cyclone tracks¶

2.4 - split the analysis into basins¶

3 - detide the selected stations¶

4 - compare with model results¶