In [ ]:
import geopandas as gp
import pandas as pd
import searvey
import hvplot.pandas
In [ ]:
def get_meta() -> gp.GeoDataFrame:
    meta_web = searvey.get_ioc_stations().drop(columns=["lon", "lat"])
    meta_api = (
        pd.read_json(
            "http://www.ioc-sealevelmonitoring.org/service.php?query=stationlist&showall=all"
        )
        .drop_duplicates()
        .drop(columns=["lon", "lat"])
        .rename(columns={"Code": "ioc_code", "Lon": "lon", "Lat": "lat"})
    )
    merged = pd.merge(
        meta_web,
        meta_api[["ioc_code", "lon", "lat"]].drop_duplicates(),
        on=["ioc_code"],
    )
    return merged.drop(columns=["geometry"])

ioc_ = get_meta()
ioc_["is_ioc"] = True
ioc_['is_satellite'] = False
ioc_
Out[ ]:
ioc_code gloss_id country location connection dcp_id last_observation_level last_observation_time delay interval ... observations_expected_per_month observations_ratio_per_month observations_ratio_per_day sample_interval average_delay_per_day transmit_interval lon lat is_ioc is_satellite
0 abas 327 Japan Abashiri SWJP40 ABASHIRI 2.12 06:29 17' 10' ... 44640.0 100 99 1' 7' 10' 144.290000 44.020000 True False
1 abed <NA> UK Aberdeen ftp NaN 3.36 2024-08-07 13:15 18h 15' ... 2976.0 100 0 15' NaN 15' -2.080000 57.140000 True False
2 abur 82 Japan Aburatsu SWJP40 ABURATSU 1.67 06:29 17' 10' ... 44640.0 100 99 1' 7' 10' 131.410000 31.580000 True False
3 acaj 182 El Salvador Acajutla SV SZXX01 300434064008810 0.95 06:39 7' 5' ... 44640.0 89 99 1' 5' 5' -89.838128 13.573792 True False
4 acap 267 Mexico Acapulco MX SEPA40 3540E15A 8.26 -down- 2589d 5' ... NaN 0 0 1' NaN 5' -99.916600 16.833300 True False
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1630 zkth <NA> Greece Zakynthos, Ionian Islands bgan GR-ZKTH-00 0.01 06:43 3' 1' ... 44640.0 96 99 1' 3' 1' 20.905200 37.781420 True False
1631 zldw <NA> Belgium Zeebrugge Leopold II dam web NaN 2.19 06:35 11' 5' ... 8928.0 100 56 5' 7' 5' 3.200278 51.346389 True False
1632 zwdw <NA> Belgium Zeebrugge Wielingendok web NaN 2.21 06:35 11' 5' ... 8928.0 100 56 5' 7' 5' 3.178056 51.355278 True False
1633 zygi <NA> Cyprus Zygi ftp NaN 1.91 -down- 3338d 1' ... NaN 0 0 0.5' NaN 1' 33.338375 34.727083 True False
1634 zygi1 <NA> Cyprus Zygi bgan ZYGI1 NaN 2022-03-18 00:47 874d 1' ... NaN 0 0 1' NaN 1' 33.340228 34.726315 True False

1635 rows × 26 columns

In [ ]:
mycols = [str(i) for i in range(6)] # we expect 17 cols max in that file
stof2d = pd.read_csv(
    "https://polar.ncep.noaa.gov/stofs/data/stofs_2d_glo_elev_stat_v2_1_0",
    names=mycols, 
    sep="\t+|!", 
    header=None, 
    skiprows=1
)
stof2d['Info'] = stof2d.apply(lambda row: ' '.join(filter(None, row[2:])), axis=1)
stof2d['ID'] = stof2d['Info'].apply(lambda x: ' '.join(x.split()[:3]))
stof2d['Info'] = stof2d.apply(lambda row: row['Info'].replace(row['ID'], '').strip(), axis=1)
stof2d = stof2d.drop(columns=["2", "3", "4", "5"])
stof2d.rename(columns={"0": 'lon', "1": 'lat'}, inplace=True)
stof2d['is_satellite'] = stof2d.apply(lambda row: "SA" in row["ID"], axis=1)
stof2d['is_ioc'] = False
stof2d
/tmp/ipykernel_26336/2272989170.py:2: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support regex separators (separators > 1 char and different from '\s+' are interpreted as regex); you can avoid this warning by specifying engine='python'.
  stof2d = pd.read_csv(
Out[ ]:
lon lat Info ID is_satellite is_ioc
0 -66.981003 44.903000 ME Eastport PSBM1 SOUS41 8410140 False False
1 -67.204002 44.654499 ME Cutler Farris Wharf CFWM1 SOUS41 8411060 False False
2 -68.203499 44.394001 ME Bar Harbor ATGM1 SOUS41 8413320 False False
3 -70.246002 43.656101 ME Portland CASM1 SOUS41 8418150 False False
4 -70.563301 43.320000 ME Wells WELM1 SOUS41 8419317 False False
... ... ... ... ... ... ...
1683 -178.580000 58.960000 UJ850 SOUS00 SA850 True False
1684 -144.570000 58.960000 UJ851 SOUS00 SA851 True False
1685 -68.020000 58.960000 UJ852 SOUS00 SA852 True False
1686 -51.010000 58.960000 UJ853 SOUS00 SA853 True False
1687 -25.500000 58.960000 UJ854 SOUS00 SA854 True False

1688 rows × 6 columns

In [ ]:
# Add a temporary key column to each dataframe to ensure uniqueness
stof2d['_tmpkey'] = range(len(stof2d))
ioc_['_tmpkey'] = range(len(ioc_))

# Perform the outer merge on both the 'lon', 'lat', and the temporary key column
m = pd.merge(stof2d, ioc_, on=['lon', 'lat', '_tmpkey'], how='outer')

# Now drop the temporary key columns
m.drop('_tmpkey', axis=1, inplace=True)
In [ ]:
m
Out[ ]:
lon lat Info ID is_satellite_x is_ioc_x ioc_code gloss_id country location ... observations_ratio_per_week observations_arrived_per_month observations_expected_per_month observations_ratio_per_month observations_ratio_per_day sample_interval average_delay_per_day transmit_interval is_ioc_y is_satellite_y
0 -179.7978 -38.2001 NaN NaN NaN NaN dnzc <NA> New Zealand DART New Zealand E Coast ... 82.0 2952 2976.0 99.0 100.0 15' 30' 6h True False
1 -179.7978 -38.2001 NaN NaN NaN NaN dnzc2 <NA> New Zealand DART New Zealand E Coast ... 82.0 2928 2976.0 98.0 100.0 15' 209' 6h True False
2 -178.5800 -51.4200 UJ040 SOUS00 SA040 True False NaN <NA> NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3 -178.5800 -29.7200 UJ211 SOUS00 SA211 True False NaN <NA> NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4 -178.5800 -1.9800 UJ466 SOUS00 SA466 True False NaN <NA> NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
3318 178.6037 -36.0999 NaN NaN NaN NaN dnzd2 <NA> New Zealand DART New Zealand ... 100.0 2968 2976.0 100.0 100.0 15' 197' 6h True False
3319 179.0962 -40.5992 NaN NaN NaN NaN dnzb <NA> New Zealand DART New Zealand E Coast ... 100.0 2976 2976.0 100.0 100.0 15' 30' 6h True False
3320 179.0962 -40.5992 NaN NaN NaN NaN dnzb2 <NA> New Zealand DART New Zealand E Coast ... 0.0 NaN NaN 0.0 0.0 15' NaN 6h True False
3321 179.1949 -8.5033 NaN NaN NaN NaN fong 121 Tuvalu Islands Fongafale TV ... 100.0 44613 44640.0 100.0 100.0 1' 2' 3' True False
3322 179.1950 -8.5250 Funafuti Tuvalu UH025 SOUS00 GL121 False False NaN <NA> NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

3323 rows × 30 columns

In [ ]:
m['is_ioc'] = m['is_ioc_x'].fillna(m['is_ioc_y'])
m['is_satellite'] = m['is_satellite_x'].fillna(m['is_satellite_y'])
m["id"] = m["ID"].fillna(m["ioc_code"])
m = m.drop(columns=['is_ioc_x', 'is_ioc_y', 'is_satellite_x', 'is_satellite_y'])
m
/tmp/ipykernel_26336/2921760109.py:1: FutureWarning: Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`
  m['is_ioc'] = m['is_ioc_x'].fillna(m['is_ioc_y'])
/tmp/ipykernel_26336/2921760109.py:2: FutureWarning: Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`
  m['is_satellite'] = m['is_satellite_x'].fillna(m['is_satellite_y'])
Out[ ]:
lon lat Info ID ioc_code gloss_id country location connection dcp_id ... observations_arrived_per_month observations_expected_per_month observations_ratio_per_month observations_ratio_per_day sample_interval average_delay_per_day transmit_interval is_ioc is_satellite id
0 -179.7978 -38.2001 NaN NaN dnzc <NA> New Zealand DART New Zealand E Coast web NaN ... 2952 2976.0 99.0 100.0 15' 30' 6h True False dnzc
1 -179.7978 -38.2001 NaN NaN dnzc2 <NA> New Zealand DART New Zealand E Coast SZNZ43 5401000 ... 2928 2976.0 98.0 100.0 15' 209' 6h True False dnzc2
2 -178.5800 -51.4200 UJ040 SOUS00 SA040 NaN <NA> NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN False True UJ040 SOUS00 SA040
3 -178.5800 -29.7200 UJ211 SOUS00 SA211 NaN <NA> NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN False True UJ211 SOUS00 SA211
4 -178.5800 -1.9800 UJ466 SOUS00 SA466 NaN <NA> NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN False True UJ466 SOUS00 SA466
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
3318 178.6037 -36.0999 NaN NaN dnzd2 <NA> New Zealand DART New Zealand SZNZ44 5501004 ... 2968 2976.0 100.0 100.0 15' 197' 6h True False dnzd2
3319 179.0962 -40.5992 NaN NaN dnzb <NA> New Zealand DART New Zealand E Coast web NaN ... 2976 2976.0 100.0 100.0 15' 30' 6h True False dnzb
3320 179.0962 -40.5992 NaN NaN dnzb2 <NA> New Zealand DART New Zealand E Coast SZNZ42 5501003 ... NaN NaN 0.0 0.0 15' NaN 6h True False dnzb2
3321 179.1949 -8.5033 NaN NaN fong 121 Tuvalu Islands Fongafale TV SZPS01 67440 ... 44613 44640.0 100.0 100.0 1' 2' 3' True False fong
3322 179.1950 -8.5250 Funafuti Tuvalu UH025 SOUS00 GL121 NaN <NA> NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN False False UH025 SOUS00 GL121

3323 rows × 29 columns

In [ ]:
m.hvplot.points(
    x='lon', y='lat', 
    c='is_ioc', 
    geo=True, 
    tiles = True, 
    cmap = "coolwarm", 
    hover_cols=["id"], 
).opts(
    width = 1800, 
    height=1000,
    title="False = STOFS list, True = IOC database", 
)
Out[ ]:
In [ ]:
m.hvplot.points(
    x='lon', y='lat', 
    c='is_satellite', 
    geo=True, 
    tiles = True, 
    cmap = "coolwarm", 
    hover_cols=["id"], 
).opts(
    width = 1800, 
    height=1000,
    title="Does the Point corresponds to setellite location? False / True", 
)
Out[ ]: