Protocol for skill assessment¶
The period of the skill assessment is the whole year 2023: from 2023-01-01 to 2023-11-30.
It is the most energetic period of the year for cylones, shown is the next figure
it seems that the begenning of the year was marked with cyclone in the south Indian ocean.
Whereas during the fall, the cylones were more frequent in the Pacific and north Atlantic oceans.
! mkdir -p data
! wget https://www.ncei.noaa.gov/data/international-best-track-archive-for-climate-stewardship-ibtracs/v04r00/access/netcdf/IBTrACS.ALL.v04r00.nc -O data/IBTrACS.ALL.v04r00.nc
--2024-04-30 12:44:17-- https://www.ncei.noaa.gov/data/international-best-track-archive-for-climate-stewardship-ibtracs/v04r00/access/netcdf/IBTrACS.ALL.v04r00.nc Resolving www.ncei.noaa.gov (www.ncei.noaa.gov)... 205.167.25.167, 205.167.25.168, 205.167.25.172, ... Connecting to www.ncei.noaa.gov (www.ncei.noaa.gov)|205.167.25.167|:443... connected. HTTP request sent, awaiting response... 200 OK Length: 26815169 (26M) [application/x-netcdf] Saving to: ‘data/IBTrACS.ALL.v04r00.nc’ data/IBTrACS.ALL.v0 100%[===================>] 25,57M 2,19MB/s in 9,2s 2024-04-30 12:44:27 (2,79 MB/s) - ‘data/IBTrACS.ALL.v04r00.nc’ saved [26815169/26815169]
# load IBtracks data
import xarray as xr
import numpy as np
import pandas as pd
import tqdm
import glob
import os
import json
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from analysea.tide import detide
import geopandas as gpd
from scipy.spatial import cKDTree
cyclones = xr.open_dataset('data/IBTrACS.ALL.v04r00.nc')
cyclones
/home/tomsail/work/gist/Validation_Protocol/.venv/lib/python3.11/site-packages/utide/harmonics.py:16: RuntimeWarning: invalid value encountered in cast nshallow = np.ma.masked_invalid(const.nshallow).astype(int) /home/tomsail/work/gist/Validation_Protocol/.venv/lib/python3.11/site-packages/utide/harmonics.py:17: RuntimeWarning: invalid value encountered in cast ishallow = np.ma.masked_invalid(const.ishallow).astype(int) - 1
<xarray.Dataset> Size: 4GB Dimensions: (storm: 13806, date_time: 360, quadrant: 4) Coordinates: time (storm, date_time) datetime64[ns] 40MB ... lat (storm, date_time) float32 20MB ... lon (storm, date_time) float32 20MB ... Dimensions without coordinates: storm, date_time, quadrant Data variables: (12/147) numobs (storm) float32 55kB ... sid (storm) |S13 179kB ... season (storm) float32 55kB ... number (storm) int16 28kB ... basin (storm, date_time) |S2 10MB ... subbasin (storm, date_time) |S2 10MB ... ... ... reunion_gust (storm, date_time) float32 20MB ... reunion_gust_per (storm, date_time) float32 20MB ... usa_seahgt (storm, date_time) float32 20MB ... usa_searad (storm, date_time, quadrant) float32 80MB ... storm_speed (storm, date_time) float32 20MB ... storm_dir (storm, date_time) float32 20MB ... Attributes: (12/49) title: IBTrACS - International Best Track Archive fo... summary: The intent of the IBTrACS project is to overc... source: The original data are tropical cyclone positi... Conventions: ACDD-1.3 Conventions_note: Data are nearly CF-1.7 compliant. The sole is... product_version: v04r00 ... ... history: Sun Apr 28 04:18:28 2024: ncks --no_abc --cnk... license: These data may be redistributed and used with... featureType: trajectory cdm_data_type: Trajectory comment: The tracks of TCs generally look like a traje... NCO: netCDF Operators version 4.8.1 (Homepage = ht...
# select only the summer 2023 period
indice_storm_2023 = []
def is_in_summer_2023(times):
test1 = [pd.Timestamp(t.decode()) < pd.Timestamp('2023-11-30') for t in times]
test2 = [pd.Timestamp(t.decode()) > pd.Timestamp('2023-01-01') for t in times]
test = np.logical_and(test1, test2)
return np.any(test)
for i_storm in tqdm.tqdm(cyclones.storm.values[13300:]): # no need to loop over 19th/20th century!!
times = cyclones.isel(storm=i_storm).iso_time.values
if is_in_summer_2023(times):
indice_storm_2023.append(i_storm)
# takes ~ 20sec to run
100%|██████████| 506/506 [00:16<00:00, 30.12it/s]
def extract_specs(istorm):
event = cyclones.isel(storm=int(istorm))
x, y, time = np.array(event.lon), np.array(event.lat), np.array(event.time)
mask = ~np.isnan(x)
x = x[mask]
y = y[mask]
time = time[mask]
wind_max = np.array(event.usa_wind)[mask]
r64 = np.array(event.usa_rmw)[mask]/(111 * np.cos(np.deg2rad(y)))
name = np.array(event.name)
basin = np.array(event.basin)[mask]
return x, y, time, wind_max, r64, name, basin
def plot_cyclone_track(x, y, wind_max, r64, name, ax = None):
if ax is None:
fig, ax = plt.subplots(1,1, figsize=(16,8))
im = ax.scatter(x, y, c = wind_max, label=name, s = 1, cmap = 'jet', vmin = 0, vmax = 100)
for ix, xi in enumerate(x):
circ = plt.Circle((x[ix], y[ix]),
radius=r64[ix],
fill=False,
hatch = '////',
color = cm.jet(np.min([1, wind_max[ix]/100])),
alpha = 0.3)
ax.add_patch(circ)
# ax.axis('equal')
return ax, im
# check 2023 cyclone tracks
fig, ax = plt.subplots(1,1, figsize=(16,8))
for istorm in indice_storm_2023:
x, y, time, wind_max, r64, name, basin = extract_specs(istorm)
ax, im = plot_cyclone_track(x, y, wind_max, r64, name, ax)
if max(wind_max) > 100:
print(f"index IBtracks #{istorm}, name: {name}, max wind {np.max(wind_max)}, start: {np.min(time)} end: {np.max(time)}")
plt.colorbar(im, ax = ax, orientation = 'horizontal', fraction = 0.05, aspect = 60)
plt.tight_layout()
index IBtracks #13704, name: b'FREDDY', max wind 138.0, start: 2023-02-06T06:00:00.000039936 end: 2023-02-24T06:00:00.000039936 index IBtracks #13708, name: b'JUDY', max wind 109.0, start: 2023-02-27T00:00:00.000039936 end: 2023-03-04T18:00:00.000039936 index IBtracks #13709, name: b'KEVIN', max wind 134.0, start: 2023-03-01T18:00:00.000039936 end: 2023-03-07T06:00:00.000039936 index IBtracks #13711, name: b'HERMAN', max wind 128.0, start: 2023-03-29T12:00:00.000039936 end: 2023-04-02T12:00:00.000039936 index IBtracks #13712, name: b'ILSA', max wind 128.0, start: 2023-04-08T18:00:00.000039936 end: 2023-04-13T18:00:00.000039936 index IBtracks #13714, name: b'MOCHA', max wind 138.0, start: 2023-05-11T00:00:00.000039936 end: 2023-05-14T18:00:00.000039936 index IBtracks #13716, name: b'MAWAR', max wind 159.0, start: 2023-05-20T00:00:00.000039936 end: 2023-06-03T06:00:00.000039936 index IBtracks #13718, name: b'BIPARJOY', max wind 105.0, start: 2023-06-06T06:00:00.000039936 end: 2023-06-16T12:00:00.000039936 index IBtracks #13725, name: b'CALVIN', max wind 110.0, start: 2023-07-09T00:00:00.000039936 end: 2023-07-19T18:00:00.000039936 index IBtracks #13729, name: b'DOKSURI', max wind 128.0, start: 2023-07-21T12:00:00.000039936 end: 2023-07-28T06:00:00.000039936 index IBtracks #13730, name: b'KHANUN', max wind nan, start: 2023-07-27T06:00:00.000039936 end: 2023-08-10T18:00:00.000039936 index IBtracks #13731, name: b'DORA', max wind 125.0, start: 2023-07-28T18:00:00.000039936 end: 2023-08-16T00:00:00.000039936 index IBtracks #13734, name: b'LAN', max wind 115.0, start: 2023-08-07T18:00:00.000039936 end: 2023-08-17T12:00:00.000039936 index IBtracks #13735, name: b'FERNANDA', max wind 115.0, start: 2023-08-10T00:00:00.000039936 end: 2023-08-17T12:00:00.000039936 index IBtracks #13737, name: b'HILARY', max wind 125.0, start: 2023-08-13T18:00:00.000039936 end: 2023-08-21T18:00:00.000039936 index IBtracks #13740, name: b'FRANKLIN', max wind 130.0, start: 2023-08-19T18:00:00.000039936 end: 2023-09-09T18:00:00.000039936 index IBtracks #13744, name: b'SAOLA', max wind 134.0, start: 2023-08-23T18:00:00.000039936 end: 2023-09-03T12:00:00.000039936 index IBtracks #13745, name: b'IDALIA', max wind 115.0, start: 2023-08-26T12:00:00.000039936 end: 2023-09-08T06:00:00.000039936 index IBtracks #13746, name: b'HAIKUI', max wind 105.0, start: 2023-08-28T12:00:00.000039936 end: 2023-09-04T18:00:00.000039936 index IBtracks #13750, name: b'JOVA', max wind 140.0, start: 2023-09-02T00:00:00.000039936 end: 2023-09-10T18:00:00.000039936 index IBtracks #13751, name: b'LEE', max wind 145.0, start: 2023-09-05T12:00:00.000039936 end: 2023-09-18T18:00:00.000039936 index IBtracks #13762, name: b'KOINU', max wind 119.0, start: 2023-09-29T12:00:00.000039936 end: 2023-10-10T00:00:00.000039936 index IBtracks #13763, name: b'LIDIA', max wind 120.0, start: 2023-09-30T18:00:00.000039936 end: 2023-10-11T06:00:00.000039936 index IBtracks #13765, name: b'BOLAVEN', max wind 154.0, start: 2023-10-07T00:00:00.000039936 end: 2023-10-14T12:00:00.000039936 index IBtracks #13767, name: b'NORMA', max wind 115.0, start: 2023-10-15T00:00:00.000039936 end: 2023-10-23T12:00:00.000039936 index IBtracks #13768, name: b'OTIS', max wind 145.0, start: 2023-10-17T18:00:00.000039936 end: 2023-10-25T18:00:00.000039936 index IBtracks #13771, name: b'TEJ', max wind 109.0, start: 2023-10-20T12:00:00.000039936 end: 2023-10-24T06:00:00.000039936 index IBtracks #13772, name: b'LOLA', max wind 119.0, start: 2023-10-21T18:00:00.000039936 end: 2023-10-26T00:00:00.000039936
2 - compare the cyclone tracks with tide gauges locations¶
2.1 - get clean tide gauges¶
from ioc_cleanup
clean tide gauges: https://github.com/seareport/ioc_cleanup
# get clean tide gauges
def files_in_folder(folder, ext = '.csv'):
list_files = []
for item in glob.glob(folder + '*'):
tmp = item.split(ext)[0]
root, name = os.path.split(tmp)
if item.endswith(ext):
if ext == ".parquet":
list_files.append(name)
elif ext == ".csv":
list_files.append(name)
return list_files
CLEAN_FOLDER = "/home/tomsail/work/python/seareport_org/ioc_cleanup/clean/"
list_TG_2023 = files_in_folder(CLEAN_FOLDER, ext = '.parquet')
len(list_TG_2023)
182
2.2 - identify tide gauges with SEASET¶
## get SEASET stations -- latest version
seaset_full = pd.read_csv("/home/tomsail/Documents/work/python/oceanmodelling/seaset/Notebooks/catalog_full_updated.csv", index_col=0)
# some ioc code got remove for same location ex prin/prin2
def is_similar_station(ioc_code, station_list):
return any(station.startswith(ioc_code) for station in station_list)
ioc_seaset = seaset_full.dropna(subset='ioc_code')
subset_2023 = ioc_seaset[ioc_seaset['ioc_code'].apply(is_similar_station, station_list=list_TG_2023)]
subset_2023
Station_Name | ioc_code | gloss_id | latitude | longitude | Country | connection | contacts | dcp_id | last_observation_level | ... | number_of_years | time_zone_hours | datum_information | instrument | precision | null_value | gauge_type | overall_record_quality | gesla3_id | seaset_id | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
125 | Base O'Higgins | ohig | NaN | -63.321 | -57.901 | Antarctica | SXCH40 | Servicio Hidrográfico y Oceanográfico de la Ar... | ADC04BE6 | 1.75 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 125 |
135 | Puerto Deseado | dese | 190.0 | -47.754 | -65.915 | Argentina | SEPO40 | Armada Argentina Servicio de HidrografÃa Naval... | 33912088 | 3.69 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 135 |
136 | Puerto Madryn | madry | 191.0 | -42.763 | -65.031 | Argentina | SEPO40 | Armada Argentina Servicio de HidrografÃa Naval... | 335665D2 | 6.60 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 136 |
139 | Battery Point | bapj | NaN | -42.892 | 147.338 | Australia | SZAU01 | National Tidal Centre/Australian Bureau of Met... | 61221 | 0.91 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 139 |
140 | Broome | brom | 40.0 | -18.001 | 122.219 | Australia | SZAU01 | National Tidal Centre/Australian Bureau of Met... | 62650 | 7.69 | ... | 32.0 | 0.0 | Unspecified | Unspecified | Unspecified | -99.9999 | Coastal | No obvious issues | Broome | 140 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
5451 | Woods Hole, MA | wood | NaN | 41.523 | -70.672 | USA | web | National Ocean Service-NOAA ( USA ) | 3340A79C | 1.08 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 5451 |
5462 | Yakutat | yaku | NaN | 59.548 | -139.735 | USA | web | National Ocean Service-NOAA ( USA ) | 336B0522 | 2.04 | ... | 61.0 | 0.0 | Station Datum | Unspecified | Unspecified | -99.9999 | Coastal | No obvious issues | Yakutat | 5462 |
6129 | Colombo | colo | 33.0 | 6.941 | 79.850 | Sri Lanka | SWIO40 | National Aquatic Resources Research and Develo... | 0650018E | 1.39 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 6129 |
6141 | DART New Zealand E Coast | dnze | NaN | -36.049 | -177.708 | New Zealand | web | GNS Science ( New Zealand ) | 5401001 | 5776.60 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 6141 |
6161 | Prince Rupert | prin2 | 155.0 | 54.317 | -130.317 | Canada | web | Fisheries and Oceans Canada ( Canada ) | NaN | 6.28 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 6161 |
169 rows × 77 columns
2.3 - refine to stations which are in the vicity of the cyclone tracks¶
dist_max = 5 # in degrees
def dist(lon1, lat1, lon2, lat2):
return np.sqrt((lat2 - lat1) ** 2 + (lon2 - lon1) **2)
def is_close_station(station, lons, lats, dist_max = 5):
lons[lons > 180] = lons[lons > 180] - 360
lon, lat = station
return np.any(dist(lon, lat, lons, lats) < dist_max)
def get_tracks(cyclone_data):
lons = []
lats = []
for storm in cyclone_data.storm.values:
lons.extend(cyclone_data.isel(storm=storm).lon.values)
lats.extend(cyclone_data.isel(storm=storm).lat.values)
return lons, lats
def subset_from_cyclone(df, cyclone_data, dist_max = 5):
lons, lats = get_tracks(cyclone_data)
close_stations = df[df
.apply(
lambda row: is_close_station(
(row['longitude'], row['latitude']),
np.array(lons),
np.array(lats),
dist_max = dist_max
),
axis=1
)]
return close_stations
def subset_from_tracks(df, lons, lats, dist_max = 5):
close_stations = df[df
.apply(
lambda row: is_close_station(
(row['longitude'], row['latitude']),
np.array(lons),
np.array(lats),
dist_max = dist_max
),
axis=1
)]
return close_stations
# superpose with 2023 cyclone tracks
fig, ax = plt.subplots(1,1, figsize=(16,8))
for istorm in indice_storm_2023:
x, y, time, wind_max, r64, name, basin = extract_specs(istorm)
ax, im = plot_cyclone_track(x, y, wind_max, r64, name, ax)
stations_impacted_all = subset_from_cyclone(subset_2023, cyclones.isel(storm=indice_storm_2023))
stations_impacted_all.plot.scatter(x='longitude', y='latitude', ax=ax, s= 100, c = 'r', marker = "*", edgecolor = 'k')
<Axes: xlabel='longitude', ylabel='latitude'>
2.4 - split the analysis into basins¶
the basins in IBTracks are the following ones
basins = {
"East Pacific": 'EP',
"North Atlantic":'NA',
"North Indian": 'NI',
"South Indian": 'SI',
"South Pacific": 'SP',
"West Pacific": 'WP',
}
out = dict()
for basin in basins.keys():
zone = basins[basin]
for istorm in indice_storm_2023:
x, y, time, wind_max, r64, name, basins_ = extract_specs(istorm)
list_ = [b.decode() for b in basins_]
if zone in list_:
stations_impacted = subset_from_tracks(subset_2023, x, y)
stations_impacted.plot.scatter(x='longitude', y='latitude',ax=ax, s= 100, c = 'r', marker = "*", edgecolor = 'k')
if len(stations_impacted) > 0:
params = {
'name': str(name)[2:-1], #byte litteral
'start' : pd.Timestamp(time[0]).strftime('%Y-%m-%d %H:%M:%S'),
'end' : pd.Timestamp(time[-1]).strftime('%Y-%m-%d %H:%M:%S'),
'stations': [c_ for c_ in stations_impacted.ioc_code.values]
}
out[str(istorm)] = params
print(params)
with open('stations_impacted_2023.json', 'w') as f:
json.dump(out, f, indent=2)
{'name': 'BEATRIZ', 'start': '2023-06-26 12:00:00', 'end': '2023-07-01 12:00:00', 'stations': ['huat']} {'name': 'CALVIN', 'start': '2023-07-09 00:00:00', 'end': '2023-07-19 18:00:00', 'stations': ['hilo', 'kahu', 'kawa']} {'name': 'DORA', 'start': '2023-07-28 18:00:00', 'end': '2023-08-16 00:00:00', 'stations': ['huat']} {'name': 'EUGENE', 'start': '2023-08-03 00:00:00', 'end': '2023-08-07 18:00:00', 'stations': ['huat']} {'name': 'HILARY', 'start': '2023-08-13 18:00:00', 'end': '2023-08-21 18:00:00', 'stations': ['alam', 'fpnt', 'lajo', 'pslu']} {'name': 'JOVA', 'start': '2023-09-02 00:00:00', 'end': '2023-09-10 18:00:00', 'stations': ['huat']} {'name': 'MAX', 'start': '2023-10-05 00:00:00', 'end': '2023-10-10 06:00:00', 'stations': ['huat']} {'name': 'NORMA', 'start': '2023-10-15 00:00:00', 'end': '2023-10-23 12:00:00', 'stations': ['huat']} {'name': 'OTIS', 'start': '2023-10-17 18:00:00', 'end': '2023-10-25 18:00:00', 'stations': ['huat']} {'name': 'PILAR', 'start': '2023-10-24 12:00:00', 'end': '2023-11-06 00:00:00', 'stations': ['huat']} {'name': 'ARLENE', 'start': '2023-05-31 18:00:00', 'end': '2023-06-04 06:00:00', 'stations': ['cwfl', 'kwfl']} {'name': 'BRET', 'start': '2023-06-19 06:00:00', 'end': '2023-06-24 18:00:00', 'stations': ['lime']} {'name': 'GERT', 'start': '2023-08-19 00:00:00', 'end': '2023-09-04 12:00:00', 'stations': ['lime']} {'name': 'FRANKLIN', 'start': '2023-08-19 18:00:00', 'end': '2023-09-09 18:00:00', 'stations': ['amal', 'lime']} {'name': 'IDALIA', 'start': '2023-08-26 12:00:00', 'end': '2023-09-08 06:00:00', 'stations': ['benc', 'cwfl', 'dpnc', 'kwfl']} {'name': 'LEE', 'start': '2023-09-05 12:00:00', 'end': '2023-09-18 18:00:00', 'stations': ['stjo', 'bame', 'boma', 'amal', 'wood']} {'name': 'OPHELIA', 'start': '2023-09-21 12:00:00', 'end': '2023-09-24 18:00:00', 'stations': ['acnj', 'bamd', 'benc', 'bgct', 'dpnc']} {'name': 'PHILIPPE', 'start': '2023-09-23 06:00:00', 'end': '2023-10-06 12:00:00', 'stations': ['amal', 'lime']} {'name': 'TAMMY', 'start': '2023-10-18 18:00:00', 'end': '2023-10-31 18:00:00', 'stations': ['amal', 'lime']} {'name': 'ELLIE', 'start': '2022-12-22 06:00:00', 'end': '2023-01-06 18:00:00', 'stations': ['brom', 'darw']} {'name': 'CHENESO', 'start': '2023-01-17 18:00:00', 'end': '2023-01-30 06:00:00', 'stations': ['dzao']} {'name': 'FREDDY', 'start': '2023-02-06 06:00:00', 'end': '2023-02-24 06:00:00', 'stations': ['djve']} {'name': 'ILSA', 'start': '2023-04-08 18:00:00', 'end': '2023-04-13 18:00:00', 'stations': ['brom', 'djve', 'darw']} {'name': 'HALE', 'start': '2023-01-06 18:00:00', 'end': '2023-01-08 06:00:00', 'stations': ['ross', 'mare']} {'name': 'IRENE', 'start': '2023-01-18 06:00:00', 'end': '2023-01-19 12:00:00', 'stations': ['mare']} {'name': 'NOT_NAMED', 'start': '2023-01-20 18:00:00', 'end': '2023-01-21 12:00:00', 'stations': ['mare']} {'name': 'JUDY', 'start': '2023-02-27 00:00:00', 'end': '2023-03-04 18:00:00', 'stations': ['mare']} {'name': 'KEVIN', 'start': '2023-03-01 18:00:00', 'end': '2023-03-07 06:00:00', 'stations': ['mare']} {'name': 'LOLA', 'start': '2023-10-21 18:00:00', 'end': '2023-10-26 00:00:00', 'stations': ['mare']} {'name': 'MAL', 'start': '2023-11-13 00:00:00', 'end': '2023-11-16 00:00:00', 'stations': ['viti']} {'name': 'SANVU', 'start': '2023-04-19 12:00:00', 'end': '2023-04-22 06:00:00', 'stations': ['dkwa']} {'name': 'MAWAR', 'start': '2023-05-20 00:00:00', 'end': '2023-06-03 06:00:00', 'stations': ['abur', 'ishig', 'kusm', 'naga', 'naha', 'omae', 'tosa', 'guam', 'pagb']} {'name': 'GUCHOL', 'start': '2023-06-06 00:00:00', 'end': '2023-06-13 12:00:00', 'stations': ['kusm', 'naha', 'omae']} {'name': 'KHANUN', 'start': '2023-07-27 06:00:00', 'end': '2023-08-10 18:00:00', 'stations': ['abur', 'hmda', 'ishig', 'naga', 'naha', 'saig', 'tosa']} {'name': 'DORA', 'start': '2023-07-28 18:00:00', 'end': '2023-08-16 00:00:00', 'stations': ['huat']} {'name': 'LAN', 'start': '2023-08-07 18:00:00', 'end': '2023-08-17 12:00:00', 'stations': ['abur', 'fuka', 'hmda', 'hana', 'kusm', 'kush', 'omae', 'sado', 'saig', 'tosa', 'waka']} {'name': 'DAMREY', 'start': '2023-08-23 06:00:00', 'end': '2023-08-28 18:00:00', 'stations': ['hana', 'kush']} {'name': 'SAOLA', 'start': '2023-08-23 18:00:00', 'end': '2023-09-03 12:00:00', 'stations': ['ishig']} {'name': 'HAIKUI', 'start': '2023-08-28 12:00:00', 'end': '2023-09-04 18:00:00', 'stations': ['ishig', 'naha']} {'name': 'KIROGI', 'start': '2023-08-30 00:00:00', 'end': '2023-09-04 18:00:00', 'stations': ['abur', 'hmda', 'kusm', 'naga', 'omae', 'saig', 'tosa', 'dkwa']} {'name': 'YUN-YEUNG', 'start': '2023-09-05 18:00:00', 'end': '2023-09-08 18:00:00', 'stations': ['kusm', 'omae', 'sado', 'tosa']} {'name': 'KOINU', 'start': '2023-09-29 12:00:00', 'end': '2023-10-10 00:00:00', 'stations': ['ishig']} {'name': 'BOLAVEN', 'start': '2023-10-07 00:00:00', 'end': '2023-10-14 12:00:00', 'stations': ['dkwa', 'guam', 'pagb']}
3 - detide the selected stations¶
create a folder for detide stations first
! mkdir -p data/surge
SURGE_FOLDER = "./data/surge/"
for ii,ioc_code in enumerate(subset_2023.ioc_code):
lat = subset_2023.iloc[ii].latitude
if not os.path.exists(SURGE_FOLDER + f"{ioc_code}.parquet"):
df = pd.read_parquet(CLEAN_FOLDER + f"{ioc_code}.parquet")
surge = detide(df[df.columns[0]], lat=lat, resample_detide = True)
surge.to_frame().to_parquet(SURGE_FOLDER + f"{ioc_code}.parquet")
# 1 hour without resampling
# 30 sec
4 - compare with model results¶
BASE = "/home/tomsail/Documents/work/python/pyPoseidon/Tutorial/models/"
v0 = "/home/tomsail/work/python/pyPoseidon/Tutorial/models/v0.0/telemac/results_2D.nc"
v0p2 = "/home/tomsail/work/python/pyPoseidon/Tutorial/models/v0.2/telemac/results_2D.nc"
# v2p0 = "/home/tomsail/work/models/results/global-v2/202307_2D_tri.zarr"
## load result file -- 2D
model_tel_v0 = xr.open_dataset(v0)
model_tel_v0p2 = xr.open_dataset(v0p2)
# model_tel_v2 = xr.open_dataset(v2p0)
model_tel_v0p2
<xarray.Dataset> Size: 26GB Dimensions: (time: 7272, node: 301572, face: 588314, max_no_vertices: 3) Coordinates: longitude (node) float32 1MB ... latitude (node) float32 1MB ... * time (time) datetime64[ns] 58kB 2023-01-01 ... 2023-10-30T23:00:00 Dimensions without coordinates: node, face, max_no_vertices Data variables: u (time, node) float32 9GB ... v (time, node) float32 9GB ... elev (time, node) float32 9GB ... face_nodes (face, max_no_vertices) int32 7MB ... Attributes: Conventions: CF-1.0, UGRID-1.0 title: TELEMAC Model output source: TELEMAC model output version vV8P4 references: https://gitlab.pam-retd.fr/otm/telemac-mascaret history: created by pyposeidon comment: TELEMAC Model output type: TELEMAC Model output
# load json
import json
with open("./data/storms/cyclones_2023.json", "r") as f:
events = json.load(f)
global variables for the following plots
ds_2D = [model_tel_v0, model_tel_v0p2 ]
version2D = ["v0.0", "v0.2.0","v0.2.1", "v2.0"]
alpha = [1, 0.6, 0.4, 0.3, 0.3]
colors = ['blue', 'red', 'purple', 'brown', 'green']
useful functions
def is_overlapping(tris, meshx):
PIR = 180
x1, x2, x3 = meshx[tris].T
return np.logical_or(abs(x2 - x1) > PIR, abs(x3 - x1) > PIR)
def extract_max_elev(ds, times):
maxH = ds.elev.sel(time=times, method='nearest').max(dim='time')
return maxH
def plot_max_elev(ax, ds, times):
m = is_overlapping(ds.face_nodes,ds.longitude)
max_elev = extract_max_elev(ds,times)
im = ax.tricontourf(
ds.longitude,
ds.latitude,
ds.face_nodes[~m],
max_elev.values,
levels = np.arange(-0, 0.5, 0.02),
extend = 'both')
return im
def closest_n_points(nodes, N, meshXY, dist_max=np.inf):
mytree = cKDTree(meshXY)
d_, indice = mytree.query(nodes, range(1, N + 1))
indice[d_ > dist_max] = -1
mask = indice != -1
return indice[mask].T, d_[mask].T
def extract_t_elev_1D(ds, seaset_id):
idx_ds = np.where(ds.seaset_id == seaset_id)[0]
if len(idx_ds) > 0:
elev_ = ds.isel(node=idx_ds[0]).elev.values
t_ = [pd.Timestamp(ti) for ti in ds.isel(node=idx_ds[0]).time.values]
else:
print(f"station: {ioc_code}, seaset_id: {seaset_id} not found in model")
t_ = None; elev_ = None
return pd.Series(elev_, index=t_)
def extract_t_elev_2D(ds, x, y):
lons, lats = ds.longitude.values, ds.latitude.values
indx, dist_ = closest_n_points(np.array([x, y]).T, 1, np.array([lons,lats]).T)
ds_ = ds.isel(node=indx[0])
elev_ = ds_.elev.values
t_ = [pd.Timestamp(ti) for ti in ds_.time.values]
return pd.Series(elev_, index=t_), np.round(dist_, 2)
def get_corr(df1: pd.DataFrame, df2: pd.Series):
ts1, ts2 = df1.align(df2, axis = 0)
ts1 = ts1.interpolate()
nan_mask1 = pd.isna(ts1)
nan_mask2 = pd.isna(ts2)
nan_mask = np.logical_or(nan_mask1.values.T[0], nan_mask2.values)
ts1 = ts1[~nan_mask]
ts2 = ts2[~nan_mask]
corr = ts1.corr(ts2)
return np.round(corr, 2), ts1, ts2
def get_percentiles(ts1, ts2):
x = np.arange(0, 0.99, 0.001)
x = np.hstack([x, np.arange(0.99, 1, 0.0001)])
pc1 = np.zeros(len(x))
pc2 = np.zeros(len(x))
for it, thd in enumerate(x):
pc1[it] = ts1.quantile(thd)
pc2[it] = ts2.quantile(thd)
return pc1, pc2
%matplotlib widget
for storm in events:
fig, ax = plt.subplots(1,1, figsize=(14,8))
x, y, time, wind_max, r64, name, basin = extract_specs(storm)
xmin, xmax = np.min(x), np.max(x)
ymin, ymax = np.min(y), np.max(y)
tmin = pd.Timestamp(events[storm]['start'])
tmax = pd.Timestamp(events[storm]['end'])
name = events[storm]['name']
im = plot_max_elev(ax, ds_2D[-1], pd.date_range(tmin, tmax, freq='1h'))
ax, im2 = plot_cyclone_track(x, y, wind_max, r64, name, ax = ax)
ax.axis('equal')
if name == "BABET": xmin, xmax, ymin, ymax = -10, 20, 40, 70 #
ax.set_xlim(xmin - 5 , xmax + 5 )
ax.set_ylim(ymin - 5 , ymax + 5 )
ax.set_title(f"cyclone {name}, id#{storm} from {pd.Timestamp(tmin)} to {pd.Timestamp(tmax)}")
plt.colorbar(im2, ax = ax, orientation = 'horizontal', pad = 0.07, fraction = 0.05, aspect = 60, label = 'max wind speed (m/s)')
plt.colorbar(im, ax = ax, pad = 0.02, fraction = 0.05, label = 'max elevation (m)')
stations_impacted = subset_2023[subset_2023.ioc_code.isin(events[storm]['stations'])]
plt.tight_layout()
#
fig1, ax1 = plt.subplots(len(stations_impacted), 1, figsize=(14,3*len(stations_impacted)))
if len(stations_impacted) == 1: ax1 = [ax1]
for i_s, ioc_code in enumerate(stations_impacted.ioc_code):
s = stations_impacted.iloc[i_s]
xl, yl = s.longitude, s.latitude
ax.scatter(xl, yl,
s=100,
lw=0.5,
c = colors[i_s % len(colors)],
marker = "*",
edgecolors = 'white',
label = ioc_code)
obs = pd.read_parquet(SURGE_FOLDER + f"{ioc_code}.parquet")
seaset_id = s.seaset_id
# observations
obs = obs.loc[tmin:tmax]
ax1[i_s].plot(obs.index, obs.values , label=f"{ioc_code}", color = 'k', linestyle = '--')
# models
for ids, ds in enumerate(ds_2D):
mod , d_= extract_t_elev_2D(ds, xl, yl)
corr, ts1, ts2 = get_corr(mod, obs[obs.columns[0]])
ax1[i_s].plot(mod.index,mod.values , label=f"model 2D {version2D[ids]}", color = colors[i_s % len(colors)], alpha = alpha[ids])
ax1[i_s].set_xlim(tmin, tmax)
ax1[i_s].legend()
ax1[0].set_title(f"cyclone {name}, id#{storm}")
ax.legend()
plt.tight_layout()
print(storm, name)
13737 HILARY 13745 IDALIA 13751 LEE 13758 OPHELIA 13699 ELLIE 13712 ILSA 13708 JUDY 13709 KEVIN 13716 MAWAR 13730 KHANUN
/tmp/ipykernel_20123/107026689.py:2: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`). Consider using `matplotlib.pyplot.close()`. fig, ax = plt.subplots(1,1, figsize=(14,8))
13734 LAN 13762 KOINU 13765 BOLAVEN -1 BABET
what about the for the whole period of simulation?
Some storms might not have been saved in the IBTracks database but still induce a significant surge in some stations.
Let's check the signal for all the stations in the surge folder
tmin = pd.Timestamp(2023,1,1)
tmax = pd.Timestamp(2023,10,31)
list_surge_manual = ['viti', 'prus', 'mill', 'pkem', 'vard', 'wood', 'kahu', 'cald2', 'hmda', 'kinl', 'benc', 'dzao',
'lerw2', 'live', 'cres', 'hie2', 'ptmt', 'ilfa', 'wick', 'rorv', 'pwil2', 'dkwa', 'bame', 'abed', 'lime', 'nshi',
'newl', 'chst', 'bamf', 'kush', 'greg', 'bamd', 'honn', 'dutc', 'talc2', 'dpnc', 'quir2', 'fpnt', 'chrp', 'ishig',
'bgct', 'vhbc', 'thev', 'npor', 'pslu', 'cher', 'boma', 'stjo', 'djve', 'ross', 'elak', 'hana', 'saig', 'pich2',
'dapi', 'ptal2', 'stor', 'ande', 'aren', 'ohig3', 'newl2', 'trst', 'kusm', 'alam', 'leit', 'sado', 'asto', 'shee',
'malo', 'corr2', 'prin2', 'nkfa', 'heys', 'helg', 'kawa', 'atka', 'qtro2', 'dove', 'waka', 'treg', 'cuxh', 'tosa',
'yaku', 'brom', 'smog', 'darw', 'cwfl', 'huat', 'sprg', 'fue2', 'abur', 'guam', 'kwfl', 'pcha2', 'barn',
'bapj', 'amal', 'hilo', 'wpwa', 'plym', 'coru', 'gokr', 'pmur', 'whit', 'nhav', 'porp', 'kungr', 'mumb', 'stqy2', 'crom',
'sitk', 'stqy', 'oste', 'acnj', 'bang', 'naga', 'mare', 'fuka', 'herb', 'pagb', 'work', 'mhav', 'lajo', 'harw',
'omae', 'coqu2', 'holy2', 'horn', 'sdpt', 'lowe', 'naha']
surge_stations = subset_2023[subset_2023.ioc_code.isin(list_surge_manual)]
countries = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
for i_s, ioc_code in tqdm.tqdm(enumerate(surge_stations.ioc_code)):
# get station coordinates
s = surge_stations.iloc[i_s]
xl, yl = s.longitude, s.latitude
# get observations
obs = pd.read_parquet(SURGE_FOLDER + f"{ioc_code}.parquet")
obs = obs.loc[tmin:tmax]
seaset_id = s.seaset_id
#
mod , d_= extract_t_elev_2D(ds_2D[-1], xl, yl) # we take the v0.2 results only
corr, ts2, ts1 = get_corr(mod, obs[obs.columns[0]])
if corr > 0.0:
# if coefficient is too low, we don't plot it
fig = plt.figure(figsize=(16,5))
gs = fig.add_gridspec(1,3, width_ratios=(2, 1, 1),
left=0, right=0.99, bottom=0, top=0.99,
wspace=0.07, hspace=0.03)
ax_plot1 = fig.add_subplot(gs[ 0])
ax_plot2 = fig.add_subplot(gs[ 1], sharey = ax_plot1)
ax_map = fig.add_subplot(gs[ 2])
ax.scatter(xl, yl,
s=100,
lw=0.5,
c = colors[i_s % len(colors)],
marker = "*",
edgecolors = 'white',
label = ioc_code)
# models
for ids, ds in enumerate(ds_2D[-1:]):
mod , d_= extract_t_elev_2D(ds, xl, yl)
corr, ts2, ts1 = get_corr(mod, obs[obs.columns[0]])
mod95 = mod.quantile(0.95)
corr95, _, _ = get_corr(mod[mod > mod95], obs[obs.columns[0]])
mod99 = mod.quantile(0.99)
corr99, _, _ = get_corr(mod[mod > mod99], obs[obs.columns[0]])
ax_plot1.plot(mod.index,mod.values , label=f"model {version2D[ids]}", color = colors[i_s % len(colors)], alpha = alpha[ids])
ax_plot2.scatter(ts1.values, ts2.values, c= 'k', label=f"model {version2D[ids]}, dist={d_}, Cr={corr}", s=1, alpha = 0.3)
ax_plot2.set_xlabel('measured data')
ax_plot2.set_ylabel('modelled data')
pc1, pc2 = get_percentiles(ts1, ts2)
ax_plot2.axline([0,0],slope = 1, lw =1, linestyle = '--', color = 'k')
ax_plot2.scatter(pc1, pc2, color = colors[i_s % len(colors)], alpha = alpha[ids])
ax_plot2.plot(pc1, pc2, color = colors[i_s % len(colors)], alpha = alpha[ids])
# observations
ax_plot1.plot(obs.index, obs.values , label=f"{ioc_code}", color = 'k', linestyle = '--')
ax_plot1.set_xlim(tmin, tmax)
ax_plot1.legend()
ax_plot2.legend()
ax_plot1.grid(axis='both', color = 'grey')
ax_plot2.grid(axis='both', color = 'grey')
# map
ax_map.scatter(xl, yl, marker = "*", c = 'r')
_ = countries.plot(color='lightgrey', ax=ax_map, zorder=-1)
ax_map.set_xlim(xl-20, xl+20)
ax_map.set_ylim(yl-20, yl+20)
/tmp/ipykernel_20123/1650063035.py:15: FutureWarning: The geopandas.dataset module is deprecated and will be removed in GeoPandas 1.0. You can get the original 'naturalearth_lowres' data from https://www.naturalearthdata.com/downloads/110m-cultural-vectors/. countries = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres')) 121it [07:34, 3.75s/it]