Find IOC stations in STOFS2D/3D output file¶
In [1]:
import geopandas as gp
import pandas as pd
import searvey
import hvplot.pandas
import hvplot.xarray
import numpy as np
import sklearn.neighbors
get stofs stations
In [2]:
mycols = [str(i) for i in range(6)] # we expect 17 cols max in that file
stof2d = pd.read_csv(
"https://polar.ncep.noaa.gov/stofs/data/stofs_2d_glo_elev_stat_v2_1_0",
names=mycols,
sep="\t+|!",
header=None,
skiprows=1
)
stof2d['Info'] = stof2d.apply(lambda row: ' '.join(filter(None, row[2:])), axis=1)
stof2d['ID'] = stof2d['Info'].apply(lambda x: ' '.join(x.split()[:3]))
stof2d['Info'] = stof2d.apply(lambda row: row['Info'].replace(row['ID'], '').strip(), axis=1)
stof2d = stof2d.drop(columns=["2", "3", "4", "5"])
stof2d.rename(columns={"0": 'lon', "1": 'lat'}, inplace=True)
stof2d
/tmp/ipykernel_555100/2319708481.py:2: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support regex separators (separators > 1 char and different from '\s+' are interpreted as regex); you can avoid this warning by specifying engine='python'. stof2d = pd.read_csv(
Out[2]:
lon | lat | Info | ID | |
---|---|---|---|---|
0 | -66.981003 | 44.903000 | ME Eastport | PSBM1 SOUS41 8410140 |
1 | -67.204002 | 44.654499 | ME Cutler Farris Wharf | CFWM1 SOUS41 8411060 |
2 | -68.203499 | 44.394001 | ME Bar Harbor | ATGM1 SOUS41 8413320 |
3 | -70.246002 | 43.656101 | ME Portland | CASM1 SOUS41 8418150 |
4 | -70.563301 | 43.320000 | ME Wells | WELM1 SOUS41 8419317 |
... | ... | ... | ... | ... |
1683 | -178.580000 | 58.960000 | UJ850 SOUS00 SA850 | |
1684 | -144.570000 | 58.960000 | UJ851 SOUS00 SA851 | |
1685 | -68.020000 | 58.960000 | UJ852 SOUS00 SA852 | |
1686 | -51.010000 | 58.960000 | UJ853 SOUS00 SA853 | |
1687 | -25.500000 | 58.960000 | UJ854 SOUS00 SA854 |
1688 rows × 4 columns
get ioc stations
In [3]:
import os
def ioc_subset_from_files_in_folder(
df: pd.DataFrame, folder: str, ext: str = ".parquet"
):
"""this function return a subset of the ioc database from all the files (json or parquet)
present in a folder
"""
list_files = []
for file in os.listdir(folder):
name = file.split(ext)[0]
if file.endswith(ext):
list_files.append(name)
return df[df.ioc_code.isin(list_files)]
def get_meta() -> gp.GeoDataFrame:
meta_web = searvey.get_ioc_stations().drop(columns=["lon", "lat"])
meta_api = (
pd.read_json(
"http://www.ioc-sealevelmonitoring.org/service.php?query=stationlist&showall=all"
)
.drop_duplicates()
.drop(columns=["lon", "lat"])
.rename(columns={"Code": "ioc_code", "Lon": "lon", "Lat": "lat"})
)
merged = pd.merge(
meta_web,
meta_api[["ioc_code", "lon", "lat"]].drop_duplicates(),
on=["ioc_code"],
)
return merged.drop(columns=["geometry"])
ioc_ = get_meta()
ioc_cleanup = ioc_subset_from_files_in_folder(ioc_, "/home/tomsail/Documents/work/python/seareport_org/skill-panel/01_obs/surge")
drop_index = ioc_cleanup.ioc_code.isin(['dapi', 'datu', 'djve', 'dkwa'])
ioc_cleanup = ioc_cleanup[~drop_index]
ioc_cleanup = ioc_cleanup.reset_index()
ioc_cleanup = ioc_cleanup.drop(columns=["index"])
In [4]:
def find_nearest_nodes(
mesh_nodes: pd.DataFrame,
points: pd.DataFrame,
metric: str = "haversine",
earth_radius = 6371000,
):
"""
Calculate the mesh nodes that are nearest to the specified `points`.
Both `mesh_nodes` and `points` must be `pandas.DataFrames` that have
columns named `lon` and `lat` and the coords must be in EPSG:4326.
Returns the `points` DataFrame after adding these extra columns:
- `mesh_index` which is the index of the node in the `hgrid.gr3` file
- `mesh_lon` which is the longitude of the nearest mesh node
- `mesh_lat` which is the latitude of the nearest mesh node
- `distance` which is the distance in meters between the point and the nearest mesh node
Examples:
>>> mesh_nodes = pd.DataFrame({
... "lon": [0, 10, 20],
... "lat": [0, 5, 0],
... })
>>> points = pd.DataFrame({
... "lon": [1, 11, 21],
... "lat": [1, 4, 1],
... "id": ["a", "b", "c"],
... })
>>> nearest_nodes = find_nearest_nodes(mesh_nodes, points)
>>> nearest_nodes
lon lat id mesh_index mesh_lon mesh_lat distance
0 1 1 a 0 0 0 157249.381272
1 11 4 b 1 10 5 157010.162641
2 21 1 c 2 20 0 157249.381272
"""
# The only requirement is that both `mesh_nodes and `points` have `lon/lat` columns
tree = sklearn.neighbors.BallTree(
np.radians(mesh_nodes[["lat", "lon"]]),
metric=metric,
)
distances, indices = tree.query(np.radians(points[["lat", "lon"]].values))
closest_nodes = (
mesh_nodes
.rename(columns={"lon": "mesh_lon", "lat": "mesh_lat"})
.iloc[indices.flatten()]
.assign(distance=(distances.flatten() * earth_radius))
.reset_index(names=["mesh_index"])
)
return pd.concat((points, closest_nodes), axis="columns")
In [6]:
nearest_nodes = find_nearest_nodes(stof2d[["lon", "lat"]], ioc_cleanup[["lon", "lat", "ioc_code" ]])
keep_nodes = nearest_nodes[nearest_nodes.distance < 5000]
keep_nodes
Out[6]:
lon | lat | ioc_code | mesh_index | mesh_lon | mesh_lat | distance | |
---|---|---|---|---|---|---|---|
1 | 131.410000 | 31.580000 | abur | 473 | 131.417000 | 31.567000 | 1590.385719 |
2 | -74.418300 | 39.355000 | acnj | 22 | -74.417000 | 39.349998 | 567.317630 |
3 | -122.298000 | 37.772000 | alam | 145 | -122.298302 | 37.771702 | 42.456977 |
4 | -64.920000 | 18.335000 | amal | 116 | -64.921307 | 18.335744 | 160.857875 |
6 | 16.134848 | 69.326067 | ande | 524 | 16.150000 | 69.317000 | 1170.659159 |
... | ... | ... | ... | ... | ... | ... | ... |
154 | 141.690000 | 45.410000 | waka | 477 | 141.683000 | 45.417000 | 951.001930 |
157 | -70.671700 | 41.523300 | wood | 8 | -70.671898 | 41.522800 | 57.989538 |
159 | -124.110000 | 46.908300 | wpwa | 170 | -124.105103 | 46.904301 | 579.762180 |
160 | -139.735000 | 59.548000 | yaku | 186 | -139.733398 | 59.548500 | 106.026795 |
161 | 39.183300 | -6.150000 | zanz | 388 | 39.190000 | -6.155000 | 926.156759 |
71 rows × 7 columns
In [11]:
plot1 = stof2d.hvplot.points(
x='lon', y='lat',
s = 50,
geo=True,
tiles = True,
hover_cols=["ID"],
label = 'STOFS stations'
)
plot2 = stof2d.iloc[keep_nodes.mesh_index].hvplot.points(
x='lon', y='lat',
s = 50,
geo=True,
tiles = True,
hover_cols=["ID"],
label = 'concomittent IOC/STOFS stations'
)
plot3 = ioc_cleanup.hvplot.points(
x='lon', y='lat',
s = 10,
geo=True,
tiles = True,
hover_cols=["ioc_code"],
label = 'IOC_cleanup'
)
(plot1 * plot2 * plot3).opts(
width=1400, height=800, title='STOFS2D/3D stations and IOC stations'
)
Out[11]: