This notebook contains the methodology for:
- choosing the best IOC candidates for storm surge validation purposes
- extracting STOFS2D data
and edit of this notebook will be done for:
- the data availability
- the data quality of IOC candidates
import geopandas as gp
import pandas as pd
import searvey
from datetime import datetime
import numpy as np
import sklearn.neighbors
import xarray as xr
import hvplot.pandas
import os
/home/tomsail/miniconda3/envs/searvey/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html from .autonotebook import tqdm as notebook_tqdm
We will extract observed data from the STOF2D model, we will focus on the points locations exported by the model.
The files have the following format: stofs_2d_glo.tCCz.points.{cwl,htp,swl}.nc
for "Six-minute station time series water level (m, MSL) forecast files at verification sites"
swl
is storm surge onlyhtp
is astromical tide onlycwl
is combined water level (swl
+htp
)
More information can be found on this page: https://noaa-gestofs-pds.s3.amazonaws.com/README.html
# A file available at the following address references all stofs 2D stations:
def get_stofs():
mycols = [str(i) for i in range(6)] # we expect 17 cols max in that file
stof2d = pd.read_csv(
"https://polar.ncep.noaa.gov/stofs/data/stofs_2d_glo_elev_stat_v2_1_0",
names=mycols,
sep="\t+|!",
header=None,
skiprows=1
)
stof2d['Info'] = stof2d.apply(lambda row: ' '.join(filter(None, row[2:])), axis=1)
stof2d['ID'] = stof2d['Info'].apply(lambda x: ' '.join(x.split()[:3]))
stof2d['Info'] = stof2d.apply(lambda row: row['Info'].replace(row['ID'], '').strip(), axis=1)
stof2d = stof2d.drop(columns=["2", "3", "4", "5"])
stof2d.rename(columns={"0": 'lon', "1": 'lat'}, inplace=True)
return stof2d
stofs = get_stofs()
stofs
/tmp/ipykernel_21996/3927710647.py:5: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support regex separators (separators > 1 char and different from '\s+' are interpreted as regex); you can avoid this warning by specifying engine='python'. stof2d = pd.read_csv(
lon | lat | Info | ID | |
---|---|---|---|---|
0 | -66.981003 | 44.903000 | ME Eastport | PSBM1 SOUS41 8410140 |
1 | -67.204002 | 44.654499 | ME Cutler Farris Wharf | CFWM1 SOUS41 8411060 |
2 | -68.203499 | 44.394001 | ME Bar Harbor | ATGM1 SOUS41 8413320 |
3 | -70.246002 | 43.656101 | ME Portland | CASM1 SOUS41 8418150 |
4 | -70.563301 | 43.320000 | ME Wells | WELM1 SOUS41 8419317 |
... | ... | ... | ... | ... |
1683 | -178.580000 | 58.960000 | UJ850 SOUS00 SA850 | |
1684 | -144.570000 | 58.960000 | UJ851 SOUS00 SA851 | |
1685 | -68.020000 | 58.960000 | UJ852 SOUS00 SA852 | |
1686 | -51.010000 | 58.960000 | UJ853 SOUS00 SA853 | |
1687 | -25.500000 | 58.960000 | UJ854 SOUS00 SA854 |
1688 rows × 4 columns
A caveat is that the 1D output files evolve over time:
stofs1 = xr.open_dataset("stofs2d/20220912_stofs_2d_glo.t18z.points.swl.nc")
stofs2 = xr.open_dataset("stofs2d/20231010_stofs_2d_glo.t18z.points.swl.nc")
stofs3 = xr.open_dataset("stofs2d/20241229_stofs_2d_glo.t12z.points.swl.nc")
stofs_2022 = stofs[stofs.ID.isin([' '.join(s.decode("utf-8").strip().split()[:3]) for s in stofs1.station_name.values])];len(stofs_2022)
stofs_2023 = stofs[stofs.ID.isin([' '.join(s.decode("utf-8").strip().split()[:3]) for s in stofs2.station_name.values])];len(stofs_2023)
stofs_2024 = stofs[stofs.ID.isin([' '.join(s.decode("utf-8").strip().split()[:3]) for s in stofs3.station_name.values])];len(stofs_2024)
562
1687
1688
luckily the new stations were appended at the end of the file. So this will be easier to concatenate data between all the files
stofs_2022[:557].equals(stofs_2023[:557])
stofs_2022[:557].equals(stofs_2024[:557])
True
True
We need to compare model storm surge with observation. We use IOC tide stations
def get_meta() -> gp.GeoDataFrame:
meta_web = searvey.get_ioc_stations().drop(columns=["lon", "lat"])
meta_api = (
pd.read_json(
"http://www.ioc-sealevelmonitoring.org/service.php?query=stationlist&showall=all"
)
.drop_duplicates()
.drop(columns=["lon", "lat"])
.rename(columns={"Code": "ioc_code", "Lon": "lon", "Lat": "lat"})
)
merged = pd.merge(
meta_web,
meta_api[["ioc_code", "lon", "lat"]].drop_duplicates(),
on=["ioc_code"],
)
return merged.drop(columns=["geometry"])
ioc_ = get_meta()
We already have established a database for clean IOC data between 2022 and 2023, we'll use it as a reference:
IOC_CLEANUP = "ioc_cleanup_2023.csv"
ioc_cleanup = pd.read_csv(IOC_CLEANUP, index_col=0).rename(columns={"longitude": 'lon', "latitude": 'lat', "Station_Name":"location","Country":"country"})
stofs_plot = stofs_2022.hvplot.scatter(x= "lon", y="lat", hover_cols = "ID", s=130, c='lightgrey', label = 'STOFS 2022 output stations')
stofs_plot1 = stofs_2023.hvplot.scatter(x="lon", y="lat", hover_cols = "ID", s=150, c='grey', label = 'STOFS 2023 output stations')
stofs_plot2 = stofs_2024.hvplot.scatter(x="lon", y="lat", hover_cols = "ID", s=200, c='k', label = 'STOFS 2024 output stations')
ioc_plot = ioc_.hvplot.scatter(x="lon", y="lat",hover_cols = "ioc_code", s= 30 , c = 'y', label = 'all IOC stations')
ioc_cleanup_plot = ioc_cleanup.hvplot.scatter(x="lon", y="lat",s = 80, c='r', label = "stations cleaned for 2022-2023")
(stofs_plot2 * stofs_plot1 * stofs_plot * ioc_cleanup_plot* ioc_plot).opts(width = 1600, height = 800)
We graphically detected all stations not already used in ioc_cleanup
and corresponding with STOFS2D output locations
station_to_add = ["juan", "sanf", "anto", "ptmo", "valp", "ferg", "ambon", "bitu", "saum", "sho2", "ushu",
"espr", "gamb", "riki", "prud", "vald", "cord", "paak", "dsea", "ketc", "june", "skag", "sewa", "anch", "niki", "seld", "kodi", "alak",
"dshu", "dkod", "nome", "adak", "niko", "dchu", "midx", "fren", "sthl", "ascen", "jask", "chab", "kara", "musc",
"masi", "mais", "kerg", "syow", "ver1", "vern", "wait", "stpa", "sala", "tara", "marsh", "kwaj", "wake", "fong",
"solo", "vanu", "numbo", "numb2", "levu", "wlgt", "jack", "hako", "abas", "ofun", "mera", "toya", "nawi", "brpt", "heeia",
"moku", "mane", "john", "plmy", "xmas", "penr", "hiva", "pape", "raro", "pago", "pagx", "east", "garc", "Male2", "ganm", "male", "hani",
"mini", "coch", "vish", "chtt", "sitt", "moul", "ptbl", "komi", "kota", "lank", "ms001", "sab2", "saba", "vung", "quin",
"quar", "curri", "subi", "mani", "luba", "lega", "tkao", "tkee", "chij", "mins", "saip", "mala", "chuu", "kapi", "deke", "naur", "nauu",
"dumo", "espe", "porl", "hill", "waik", "lemba", "beno", "prgi", "prig", "cili", "cila", "tjls", "chrs", "ffcj", "cocb", "telu", "sibo",
"sib2", "tanjo", "bupo", "padn", "pada", "fpga", "winc", "wbnc", "oinc", "kpva", "leva", "simd", "wsdc", "cbmd", "ocmd", "cmnj", "phap",
"mhpa", "btny", "shnj", "mony", "ptme", "cwme", "epme", "hali", "nain", "nuk1", "nuuk", "qaqo", "reyk", "scor", "rptx", "cctx", "pitx",
"pric", "ftfr", "rose", "barb", "stcr", "lame", "isab", "vieq", "yobu", "yabu", "faja", "sanj", "arac", "maya", "magi", "penu", "mona",
"ptpr", "ptpl", "sama", "bull", "elpo", "limon", "quepo", "sana", "acaj", "acap", "acya", "manz", "mnza", "cabo", "fort", "call", "lobos",
"tala", "lali", "vkfl", "nafl", "fmfl", "spfl", "pnfl", "pbfl", "apfl", "tpfl", "fbfl", "moal", "wlms", "psla", "gila", "pfla", "ncla",
"apla", "eila", "cpla", "sptx", "gptx", "fptx", "bres", "sthm", "casc", "gibr", "ceut", "mars", "TR22", "gvd9", "alex", "palm", "pdas",
"plus", "dakar", "tako", "tkdi", "lagos", "pntn", "sitin", "walvi", "prte", "durb", "pemba", "mtwa", "momb", "lamu", "pmon", "aric", "mata",
"plat", "salv"]
some station can be declined in different names
possible_stations = []
all_ioc = ioc_.ioc_code.values
for stat in station_to_add:
if any(stat in station for station in all_ioc):
for station in all_ioc:
if stat in station:
possible_stations.append(station)
ioc_to_add = ioc_[ioc_.ioc_code.isin(possible_stations)]
ioc_to_add
ioc_code | gloss_id | country | location | connection | dcp_id | last_observation_level | last_observation_time | delay | interval | ... | observations_ratio_per_week | observations_arrived_per_month | observations_expected_per_month | observations_ratio_per_month | observations_ratio_per_day | sample_interval | average_delay_per_day | transmit_interval | lon | lat | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | abas | 327 | Japan | Abashiri | SWJP40 | ABASHIRI | 1.69 | 09:39 | 15' | 10' | ... | 100 | 44580 | 44640.0 | 100 | 100 | 1' | 7' | 10' | 144.290000 | 44.020000 |
3 | acaj | 182 | El Salvador | Acajutla SV | SZXX01 | 300434064008810 | 0.94 | 09:49 | 5' | 5' | ... | 93 | 43295 | 44640.0 | 97 | 99 | 1' | 5' | 5' | -89.838128 | 13.573792 |
4 | acap | 267 | Mexico | Acapulco MX | SEPA40 | 3540E15A | 8.26 | -down- | 2744d | 5' | ... | 0 | -down- | NaN | 0 | 0 | 1' | NaN | 5' | -99.916600 | 16.833300 |
5 | acap2 | 267 | Mexico | Acapulco API | SOMX10 | 0100D7CA | 4.65 | 09:46 | 8' | 10' | ... | 100 | 44535 | 44640.0 | 100 | 100 | 1' | 7' | 10' | -99.903000 | 16.837933 |
9 | acya | 267 | Mexico | Acapulco Club de Yates | ftp | NaN | 1.53 | 2025-01-08 14:43 | 2d | 10' | ... | 0 | 650 | 44640.0 | 1 | 0 | 1' | NaN | 10' | -99.902980 | 16.837990 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1615 | wlms2 | <NA> | USA | Bay Waveland Yacht Club, MS | SXXX03 | 33456542 | 16.02 | -down- | 675d | 6' | ... | 0 | -down- | NaN | 0 | 0 | 1' | NaN | 6' | -89.325800 | 30.326400 |
1621 | wsdc | <NA> | USA | Washington DC | SXXX50 | 335861BA | NaN | NaN | NaN | 6' | ... | 0 | NaN | NaN | 0 | 0 | ' | NaN | 6' | -77.020000 | 38.870000 |
1624 | xmas | 146 | Kiribati | Christmas KI | SZXX01 | 301434060001890 | 1.30 | 09:50 | 4' | 5' | ... | 93 | 43291 | 44640.0 | 97 | 98 | 1' | 5' | 5' | -157.473000 | 1.984000 |
1625 | yabu | <NA> | USA | Yabucoa Harbor, PR | SXXX03 | 3366B5CA | -6.28 | 09:50 | 4' | 6' | ... | 97 | 43620 | 44640.0 | 98 | 98 | 1' | 5' | 6' | -65.833000 | 18.055100 |
1630 | yobu | <NA> | Puerto Rico | Yobucou PR | SXXX03 | 3366B5CA | NaN | NaN | NaN | 6' | ... | 0 | NaN | NaN | 0 | 0 | ' | NaN | 6' | -65.833000 | 18.050100 |
334 rows × 24 columns
stofs_plot = stofs_2022.hvplot.scatter(x= "lon", y="lat", hover_cols = "ID", s=130, c='lightgrey', label = 'STOFS 2022 output stations')
stofs_plot1 = stofs_2023.hvplot.scatter(x="lon", y="lat", hover_cols = "ID", s=150, c='grey', label = 'STOFS 2023 output stations')
stofs_plot2 = stofs_2024.hvplot.scatter(x="lon", y="lat", hover_cols = "ID", s=200, c='k', label = 'STOFS 2024 output stations')
ioc_plot = ioc_.hvplot.scatter(x="lon", y="lat",hover_cols = "ioc_code", s= 30 , c = 'y',label = 'all IOC stations')
ioc_cleanup_plot = ioc_cleanup.hvplot.scatter(x="lon", y="lat",s = 90, c='r',label = 'stations already cleaned for 2022-2023')
ioc_to_add_plot = ioc_to_add.hvplot.scatter(x="lon", y="lat",s = 90, geo=True, c = 'g', label = 'stations to be added')
(stofs_plot2 * stofs_plot1 * stofs_plot * ioc_to_add_plot * ioc_cleanup_plot).opts(width = 1800, height = 800)
WARNING:param.main: geo option cannot be used with kind='scatter' plot type. Geographic plots are only supported for following plot types: ['bivariate', 'contour', 'contourf', 'dataset', 'hexbin', 'image', 'labels', 'paths', 'points', 'points', 'polygons', 'quadmesh', 'rgb', 'vectorfield']
the 2024 IOC cleanup database is the red + green points
ioc_cleanup_2024 = pd.concat([ioc_cleanup,ioc_to_add])
ioc_cleanup_2024
ioc_cleanup_2024.to_csv("ioc_cleanup_2024.csv")
location | ioc_code | gloss_id | lat | lon | country | connection | contacts | dcp_id | last_observation_level | ... | number_of_years | time_zone_hours | datum_information | instrument | precision | null_value | gauge_type | overall_record_quality | gesla3_id | seaset_id | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
125 | Base O'Higgins | ohig | <NA> | -63.3210 | -57.9010 | Antarctica | SXCH40 | Servicio Hidrográfico y Oceanográfico de la Ar... | ADC04BE6 | 1.75 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 125.0 |
135 | Puerto Deseado | dese | 190.0 | -47.7540 | -65.9150 | Argentina | SEPO40 | Armada Argentina Servicio de Hidrografía Naval... | 33912088 | 3.69 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 135.0 |
136 | Puerto Madryn | madry | 191.0 | -42.7630 | -65.0310 | Argentina | SEPO40 | Armada Argentina Servicio de Hidrografía Naval... | 335665D2 | 6.60 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 136.0 |
139 | Battery Point | bapj | <NA> | -42.8920 | 147.3380 | Australia | SZAU01 | National Tidal Centre/Australian Bureau of Met... | 61221 | 0.91 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 139.0 |
140 | Broome | brom | 40.0 | -18.0010 | 122.2190 | Australia | SZAU01 | National Tidal Centre/Australian Bureau of Met... | 62650 | 7.69 | ... | 32.0 | 0.0 | Unspecified | Unspecified | Unspecified | -99.9999 | Coastal | No obvious issues | Broome | 140.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1615 | Bay Waveland Yacht Club, MS | wlms2 | <NA> | 30.3264 | -89.3258 | USA | SXXX03 | National Ocean Service-NOAA ( USA ) | 33456542 | 16.02 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1621 | Washington DC | wsdc | <NA> | 38.8700 | -77.0200 | USA | SXXX50 | National Ocean Service-NOAA ( USA ) | 335861BA | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1624 | Christmas KI | xmas | 146.0 | 1.9840 | -157.4730 | Kiribati | SZXX01 | Kiribati Met Office ( Kiribati ) +University o... | 301434060001890 | 1.30 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1625 | Yabucoa Harbor, PR | yabu | <NA> | 18.0551 | -65.8330 | USA | SXXX03 | Puerto Rico Seismic Network ( USA ) +National ... | 3366B5CA | -6.28 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1630 | Yobucou PR | yobu | <NA> | 18.0501 | -65.8330 | Puerto Rico | SXXX03 | NaN | 3366B5CA | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
503 rows × 77 columns
def find_nearest_nodes(
mesh_nodes: pd.DataFrame,
points: pd.DataFrame,
metric: str = "haversine",
earth_radius = 6371000,
):
"""
Calculate the mesh nodes that are nearest to the specified `points`.
Both `mesh_nodes` and `points` must be `pandas.DataFrames` that have
columns named `lon` and `lat` and the coords must be in EPSG:4326.
Returns the `points` DataFrame after adding these extra columns:
- `mesh_index` which is the index of the node in the `hgrid.gr3` file
- `mesh_lon` which is the longitude of the nearest mesh node
- `mesh_lat` which is the latitude of the nearest mesh node
- `distance` which is the distance in meters between the point and the nearest mesh node
Examples:
>>> mesh_nodes = pd.DataFrame({
... "lon": [0, 10, 20],
... "lat": [0, 5, 0],
... })
>>> points = pd.DataFrame({
... "lon": [1, 11, 21],
... "lat": [1, 4, 1],
... "id": ["a", "b", "c"],
... })
>>> nearest_nodes = find_nearest_nodes(mesh_nodes, points)
>>> nearest_nodes
lon lat id mesh_index mesh_lon mesh_lat distance
0 1 1 a 0 0 0 157249.381272
1 11 4 b 1 10 5 157010.162641
2 21 1 c 2 20 0 157249.381272
"""
# The only requirement is that both `mesh_nodes and `points` have `lon/lat` columns
tree = sklearn.neighbors.BallTree(
np.radians(mesh_nodes[["lat", "lon"]]),
metric=metric,
)
distances, indices = tree.query(np.radians(points[["lat", "lon"]].values))
closest_nodes = (
mesh_nodes
.rename(columns={"lon": "mesh_lon", "lat": "mesh_lat"})
.iloc[indices.flatten()]
.assign(distance=(distances.flatten() * earth_radius))
.reset_index(names=["mesh_index"])
)
return pd.concat((points.reset_index(drop = True), closest_nodes), axis="columns")
# 2 - get STOFS
nearest_nodes_2022 = find_nearest_nodes(stofs_2022, ioc_cleanup_2024[["lon","lat","ioc_code","location"]])
nearest_nodes_2023 = find_nearest_nodes(stofs_2023, ioc_cleanup_2024[["lon","lat","ioc_code","location"]])
nearest_nodes_2024 = find_nearest_nodes(stofs_2024, ioc_cleanup_2024[["lon","lat","ioc_code","location"]])
nearest_nodes_2022 = nearest_nodes_2022[~nearest_nodes_2022.mesh_index.isna()]
nearest_nodes_2023 = nearest_nodes_2023[~nearest_nodes_2023.mesh_index.isna()]
nearest_nodes_2024 = nearest_nodes_2024[~nearest_nodes_2024.mesh_index.isna()]
keep_nodes_2022 = nearest_nodes_2022[nearest_nodes_2022.distance < 5000]
keep_nodes_2023 = nearest_nodes_2023[nearest_nodes_2023.distance < 5000]
keep_nodes_2024 = nearest_nodes_2024[nearest_nodes_2024.distance < 5000]
keep_nodes_2022.to_csv("keep_nodes_2022.csv")
keep_nodes_2023.to_csv("keep_nodes_2023.csv")
keep_nodes_2024.to_csv("keep_nodes_2024.csv")
red are all the STOFS2D points to be extracted
p2 = stofs_2022.hvplot.scatter(x="lon", y="lat", hover_cols = "ID", s=50, c='grey', label = 'STOFS 2022 output stations')
ip = ioc_cleanup_2024.hvplot.scatter(x="lon", y="lat",s = 10, c='g',label = 'IOC_CLEANUP 2022-2024')
k2 = keep_nodes_2022.hvplot.scatter(x="lon", y="lat", c = 'red', s = 10, label = "STOFS2D stations to be extracted")
(p2 * ip * k2).opts(width = 1800, height = 800)
download STOFS
import pathlib
import pandas as pd
import httpx
import multifutures
base_path = "stofs2d"
pathlib.Path(base_path).mkdir(exist_ok=True)
# The URL changed at 2023-01-08
# These are the new urls
base_url = "https://noaa-nos-stofs2d-pds.s3.amazonaws.com/stofs_2d_glo.{date_str}/stofs_2d_glo.t{time}z.points.swl.nc"
new_url_names = {}
for date in pd.date_range("2023-01-08", "2024-12-31"):
date_str = date.strftime("%Y%m%d")
for time in ("00", "06", "12", "18"):
url = base_url.format(date_str=date_str, time=time)
name = f"{date_str}_{url.rsplit('/', 1)[1]}"
new_url_names[url] = name
# These are the old urls
base_url = "https://noaa-nos-stofs2d-pds.s3.amazonaws.com/estofs.{date_str}/estofs.t{time}z.points.swl.nc"
old_url_names = {}
for date in pd.date_range("2022-01-01", "2023-01-07"):
date_str = date.strftime("%Y%m%d")
for time in ("00", "06", "12", "18"):
url = base_url.format(date_str=date_str, time=time)
name = f"{date_str}_{url.rsplit('/', 1)[1].replace('estofs', 'stofs_2d_glo')}"
old_url_names[url] = name
def download_file(client, url, name):
with open(name, "wb") as fd:
with client.stream("GET", url) as response:
for data in response.iter_bytes():
fd.write(data)
with httpx.Client() as client:
func_kwargs = [{"client": client, "url": url, "name": f"{base_path}/{name}"} for (url, name) in new_url_names.items()]
multifutures.multithread(func=download_file, func_kwargs=func_kwargs, check=True)
with httpx.Client() as client:
func_kwargs = [{"client": client, "url": url, "name": f"{base_path}/{name}"} for (url, name) in old_url_names.items()]
multifutures.multithread(func=download_file, func_kwargs=func_kwargs, check=True)
0%| | 0/2896 [00:00<?, ?it/s]
STOFS_FOLDER = "stofs2d/"
extract stations
import glob
# gather stofs2D data
ds1 = []
ds2 = []
ds3 = []
for file in sorted(glob.glob(STOFS_FOLDER + "*.swl.nc")):
root, filename = os.path.split(file)
date = datetime.strptime(filename, "%Y%m%d_stofs_2d_glo.t%Hz.points.swl.nc")
tmp = xr.open_dataset(file)
if date > datetime(2024,5,14,11):
ds3.append(tmp.sel(time=slice(date,date+pd.Timedelta(hours=6))))
print(date, len(tmp.station_name),"ds3")
else:
if date > datetime(2023,1,7, 23):
ds2.append(tmp.sel(time=slice(date,date+pd.Timedelta(hours=6))))
print(date, len(tmp.station_name),"ds2")
else: # date < datetime.datetime(2023,1,7)
ds1.append(tmp.sel(time=slice(date,date+pd.Timedelta(hours=6))))
print(date, len(tmp.station_name),"ds1")
# keep fist time
ds1 = xr.concat(ds1, dim="time")
ds2 = xr.concat(ds2, dim="time")
ds3 = xr.concat(ds3, dim="time")
for it, station in keep_nodes_2022.iterrows(): # take 2022 because it has less nodes
print(f'Station {station.ioc_code} at {station.lon:.2f}, {station.lat:.2f}')
ds1_subset = ds1.isel(station = int(station.mesh_index))
ds2_subset = ds2.isel(station = int(station.mesh_index))
ds3_subset = ds3.isel(station = int(station.mesh_index))
ds_subset = xr.concat([ds1_subset, ds2_subset, ds3_subset], dim="time")
df = ds_subset.to_pandas()
df.to_parquet(f'data/{station.ioc_code}.parquet')