Find IOC stations in STOFS2D/3D output file¶

In [1]:
import geopandas as gp
import pandas as pd
import searvey
import hvplot.pandas
import hvplot.xarray
import numpy as np
import sklearn.neighbors

get stofs stations

In [2]:
mycols = [str(i) for i in range(6)] # we expect 17 cols max in that file
stof2d = pd.read_csv(
    "https://polar.ncep.noaa.gov/stofs/data/stofs_2d_glo_elev_stat_v2_1_0",
    names=mycols, 
    sep="\t+|!", 
    header=None, 
    skiprows=1
)
stof2d['Info'] = stof2d.apply(lambda row: ' '.join(filter(None, row[2:])), axis=1)
stof2d['ID'] = stof2d['Info'].apply(lambda x: ' '.join(x.split()[:3]))
stof2d['Info'] = stof2d.apply(lambda row: row['Info'].replace(row['ID'], '').strip(), axis=1)
stof2d = stof2d.drop(columns=["2", "3", "4", "5"])
stof2d.rename(columns={"0": 'lon', "1": 'lat'}, inplace=True)
stof2d
/tmp/ipykernel_555100/2319708481.py:2: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support regex separators (separators > 1 char and different from '\s+' are interpreted as regex); you can avoid this warning by specifying engine='python'.
  stof2d = pd.read_csv(
Out[2]:
lon lat Info ID
0 -66.981003 44.903000 ME Eastport PSBM1 SOUS41 8410140
1 -67.204002 44.654499 ME Cutler Farris Wharf CFWM1 SOUS41 8411060
2 -68.203499 44.394001 ME Bar Harbor ATGM1 SOUS41 8413320
3 -70.246002 43.656101 ME Portland CASM1 SOUS41 8418150
4 -70.563301 43.320000 ME Wells WELM1 SOUS41 8419317
... ... ... ... ...
1683 -178.580000 58.960000 UJ850 SOUS00 SA850
1684 -144.570000 58.960000 UJ851 SOUS00 SA851
1685 -68.020000 58.960000 UJ852 SOUS00 SA852
1686 -51.010000 58.960000 UJ853 SOUS00 SA853
1687 -25.500000 58.960000 UJ854 SOUS00 SA854

1688 rows × 4 columns

get ioc stations

In [3]:
import os

def ioc_subset_from_files_in_folder(
    df: pd.DataFrame, folder: str, ext: str = ".parquet"
):
    """this function return a subset of the ioc database from all the files (json or parquet)
    present in a folder
    """
    list_files = []
    for file in os.listdir(folder):
        name = file.split(ext)[0]
        if file.endswith(ext):
            list_files.append(name)
    return df[df.ioc_code.isin(list_files)]

def get_meta() -> gp.GeoDataFrame:
    meta_web = searvey.get_ioc_stations().drop(columns=["lon", "lat"])
    meta_api = (
        pd.read_json(
            "http://www.ioc-sealevelmonitoring.org/service.php?query=stationlist&showall=all"
        )
        .drop_duplicates()
        .drop(columns=["lon", "lat"])
        .rename(columns={"Code": "ioc_code", "Lon": "lon", "Lat": "lat"})
    )
    merged = pd.merge(
        meta_web,
        meta_api[["ioc_code", "lon", "lat"]].drop_duplicates(),
        on=["ioc_code"],
    )
    return merged.drop(columns=["geometry"])

ioc_ = get_meta()
ioc_cleanup = ioc_subset_from_files_in_folder(ioc_, "/home/tomsail/Documents/work/python/seareport_org/skill-panel/01_obs/surge")
drop_index = ioc_cleanup.ioc_code.isin(['dapi', 'datu', 'djve', 'dkwa'])
ioc_cleanup = ioc_cleanup[~drop_index]
ioc_cleanup = ioc_cleanup.reset_index()
ioc_cleanup = ioc_cleanup.drop(columns=["index"])
In [4]:
def find_nearest_nodes(
    mesh_nodes: pd.DataFrame,
    points: pd.DataFrame,
    metric: str = "haversine",
    earth_radius = 6371000,
    ):
    """
    Calculate the mesh nodes that are nearest to the specified `points`.
    Both `mesh_nodes` and `points` must be `pandas.DataFrames` that have
    columns named `lon` and `lat` and the coords must be in EPSG:4326.
    Returns the `points` DataFrame after adding these extra columns:
    - `mesh_index` which is the index of the node in the `hgrid.gr3` file
    - `mesh_lon` which is the longitude of the nearest mesh node
    - `mesh_lat` which is the latitude of the nearest mesh node
    - `distance` which is the distance in meters between the point and the nearest mesh node
    Examples:
        >>> mesh_nodes = pd.DataFrame({
        ...     "lon": [0, 10, 20],
        ...     "lat": [0, 5, 0],
        ... })
        >>> points = pd.DataFrame({
        ...     "lon": [1, 11, 21],
        ...     "lat": [1, 4, 1],
        ...     "id": ["a", "b", "c"],
        ... })
        >>> nearest_nodes = find_nearest_nodes(mesh_nodes, points)
        >>> nearest_nodes
           lon  lat id  mesh_index  mesh_lon  mesh_lat       distance
        0    1    1  a           0         0         0  157249.381272
        1   11    4  b           1        10         5  157010.162641
        2   21    1  c           2        20         0  157249.381272
    """
    # The only requirement is that both `mesh_nodes and `points` have `lon/lat` columns
    tree = sklearn.neighbors.BallTree(
        np.radians(mesh_nodes[["lat", "lon"]]),
        metric=metric,
    )
    distances, indices = tree.query(np.radians(points[["lat", "lon"]].values))
    closest_nodes = (
        mesh_nodes
        .rename(columns={"lon": "mesh_lon", "lat": "mesh_lat"})
        .iloc[indices.flatten()]
        .assign(distance=(distances.flatten() * earth_radius))
        .reset_index(names=["mesh_index"])
    )
    return pd.concat((points, closest_nodes), axis="columns")
In [6]:
nearest_nodes = find_nearest_nodes(stof2d[["lon", "lat"]], ioc_cleanup[["lon", "lat", "ioc_code" ]])
keep_nodes = nearest_nodes[nearest_nodes.distance < 5000]
keep_nodes
Out[6]:
lon lat ioc_code mesh_index mesh_lon mesh_lat distance
1 131.410000 31.580000 abur 473 131.417000 31.567000 1590.385719
2 -74.418300 39.355000 acnj 22 -74.417000 39.349998 567.317630
3 -122.298000 37.772000 alam 145 -122.298302 37.771702 42.456977
4 -64.920000 18.335000 amal 116 -64.921307 18.335744 160.857875
6 16.134848 69.326067 ande 524 16.150000 69.317000 1170.659159
... ... ... ... ... ... ... ...
154 141.690000 45.410000 waka 477 141.683000 45.417000 951.001930
157 -70.671700 41.523300 wood 8 -70.671898 41.522800 57.989538
159 -124.110000 46.908300 wpwa 170 -124.105103 46.904301 579.762180
160 -139.735000 59.548000 yaku 186 -139.733398 59.548500 106.026795
161 39.183300 -6.150000 zanz 388 39.190000 -6.155000 926.156759

71 rows × 7 columns

In [11]:
plot1 = stof2d.hvplot.points(
    x='lon', y='lat', 
    s = 50,
    geo=True, 
    tiles = True, 
    hover_cols=["ID"], 
    label = 'STOFS stations'
)
plot2 = stof2d.iloc[keep_nodes.mesh_index].hvplot.points(
    x='lon', y='lat', 
    s = 50,
    geo=True, 
    tiles = True, 
    hover_cols=["ID"], 
    label = 'concomittent IOC/STOFS stations'
)
plot3 = ioc_cleanup.hvplot.points(
    x='lon', y='lat', 
    s = 10,
    geo=True, 
    tiles = True, 
    hover_cols=["ioc_code"], 
    label = 'IOC_cleanup'
)
(plot1 * plot2 * plot3).opts(
    width=1400, height=800, title='STOFS2D/3D stations and IOC stations'
)
Out[11]: