In [ ]:
import geopandas as gp
import pandas as pd
import searvey
import hvplot.pandas
In [ ]:
def get_meta() -> gp.GeoDataFrame:
meta_web = searvey.get_ioc_stations().drop(columns=["lon", "lat"])
meta_api = (
pd.read_json(
"http://www.ioc-sealevelmonitoring.org/service.php?query=stationlist&showall=all"
)
.drop_duplicates()
.drop(columns=["lon", "lat"])
.rename(columns={"Code": "ioc_code", "Lon": "lon", "Lat": "lat"})
)
merged = pd.merge(
meta_web,
meta_api[["ioc_code", "lon", "lat"]].drop_duplicates(),
on=["ioc_code"],
)
return merged.drop(columns=["geometry"])
ioc_ = get_meta()
ioc_["is_ioc"] = True
ioc_['is_satellite'] = False
ioc_
Out[Â ]:
ioc_code | gloss_id | country | location | connection | dcp_id | last_observation_level | last_observation_time | delay | interval | ... | observations_expected_per_month | observations_ratio_per_month | observations_ratio_per_day | sample_interval | average_delay_per_day | transmit_interval | lon | lat | is_ioc | is_satellite | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | abas | 327 | Japan | Abashiri | SWJP40 | ABASHIRI | 2.12 | 06:29 | 17' | 10' | ... | 44640.0 | 100 | 99 | 1' | 7' | 10' | 144.290000 | 44.020000 | True | False |
1 | abed | <NA> | UK | Aberdeen | ftp | NaN | 3.36 | 2024-08-07 13:15 | 18h | 15' | ... | 2976.0 | 100 | 0 | 15' | NaN | 15' | -2.080000 | 57.140000 | True | False |
2 | abur | 82 | Japan | Aburatsu | SWJP40 | ABURATSU | 1.67 | 06:29 | 17' | 10' | ... | 44640.0 | 100 | 99 | 1' | 7' | 10' | 131.410000 | 31.580000 | True | False |
3 | acaj | 182 | El Salvador | Acajutla SV | SZXX01 | 300434064008810 | 0.95 | 06:39 | 7' | 5' | ... | 44640.0 | 89 | 99 | 1' | 5' | 5' | -89.838128 | 13.573792 | True | False |
4 | acap | 267 | Mexico | Acapulco MX | SEPA40 | 3540E15A | 8.26 | -down- | 2589d | 5' | ... | NaN | 0 | 0 | 1' | NaN | 5' | -99.916600 | 16.833300 | True | False |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1630 | zkth | <NA> | Greece | Zakynthos, Ionian Islands | bgan | GR-ZKTH-00 | 0.01 | 06:43 | 3' | 1' | ... | 44640.0 | 96 | 99 | 1' | 3' | 1' | 20.905200 | 37.781420 | True | False |
1631 | zldw | <NA> | Belgium | Zeebrugge Leopold II dam | web | NaN | 2.19 | 06:35 | 11' | 5' | ... | 8928.0 | 100 | 56 | 5' | 7' | 5' | 3.200278 | 51.346389 | True | False |
1632 | zwdw | <NA> | Belgium | Zeebrugge Wielingendok | web | NaN | 2.21 | 06:35 | 11' | 5' | ... | 8928.0 | 100 | 56 | 5' | 7' | 5' | 3.178056 | 51.355278 | True | False |
1633 | zygi | <NA> | Cyprus | Zygi | ftp | NaN | 1.91 | -down- | 3338d | 1' | ... | NaN | 0 | 0 | 0.5' | NaN | 1' | 33.338375 | 34.727083 | True | False |
1634 | zygi1 | <NA> | Cyprus | Zygi | bgan | ZYGI1 | NaN | 2022-03-18 00:47 | 874d | 1' | ... | NaN | 0 | 0 | 1' | NaN | 1' | 33.340228 | 34.726315 | True | False |
1635 rows × 26 columns
In [ ]:
mycols = [str(i) for i in range(6)] # we expect 17 cols max in that file
stof2d = pd.read_csv(
"https://polar.ncep.noaa.gov/stofs/data/stofs_2d_glo_elev_stat_v2_1_0",
names=mycols,
sep="\t+|!",
header=None,
skiprows=1
)
stof2d['Info'] = stof2d.apply(lambda row: ' '.join(filter(None, row[2:])), axis=1)
stof2d['ID'] = stof2d['Info'].apply(lambda x: ' '.join(x.split()[:3]))
stof2d['Info'] = stof2d.apply(lambda row: row['Info'].replace(row['ID'], '').strip(), axis=1)
stof2d = stof2d.drop(columns=["2", "3", "4", "5"])
stof2d.rename(columns={"0": 'lon', "1": 'lat'}, inplace=True)
stof2d['is_satellite'] = stof2d.apply(lambda row: "SA" in row["ID"], axis=1)
stof2d['is_ioc'] = False
stof2d
/tmp/ipykernel_26336/2272989170.py:2: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support regex separators (separators > 1 char and different from '\s+' are interpreted as regex); you can avoid this warning by specifying engine='python'. stof2d = pd.read_csv(
Out[Â ]:
lon | lat | Info | ID | is_satellite | is_ioc | |
---|---|---|---|---|---|---|
0 | -66.981003 | 44.903000 | ME Eastport | PSBM1 SOUS41 8410140 | False | False |
1 | -67.204002 | 44.654499 | ME Cutler Farris Wharf | CFWM1 SOUS41 8411060 | False | False |
2 | -68.203499 | 44.394001 | ME Bar Harbor | ATGM1 SOUS41 8413320 | False | False |
3 | -70.246002 | 43.656101 | ME Portland | CASM1 SOUS41 8418150 | False | False |
4 | -70.563301 | 43.320000 | ME Wells | WELM1 SOUS41 8419317 | False | False |
... | ... | ... | ... | ... | ... | ... |
1683 | -178.580000 | 58.960000 | UJ850 SOUS00 SA850 | True | False | |
1684 | -144.570000 | 58.960000 | UJ851 SOUS00 SA851 | True | False | |
1685 | -68.020000 | 58.960000 | UJ852 SOUS00 SA852 | True | False | |
1686 | -51.010000 | 58.960000 | UJ853 SOUS00 SA853 | True | False | |
1687 | -25.500000 | 58.960000 | UJ854 SOUS00 SA854 | True | False |
1688 rows × 6 columns
In [ ]:
# Add a temporary key column to each dataframe to ensure uniqueness
stof2d['_tmpkey'] = range(len(stof2d))
ioc_['_tmpkey'] = range(len(ioc_))
# Perform the outer merge on both the 'lon', 'lat', and the temporary key column
m = pd.merge(stof2d, ioc_, on=['lon', 'lat', '_tmpkey'], how='outer')
# Now drop the temporary key columns
m.drop('_tmpkey', axis=1, inplace=True)
In [ ]:
m
Out[Â ]:
lon | lat | Info | ID | is_satellite_x | is_ioc_x | ioc_code | gloss_id | country | location | ... | observations_ratio_per_week | observations_arrived_per_month | observations_expected_per_month | observations_ratio_per_month | observations_ratio_per_day | sample_interval | average_delay_per_day | transmit_interval | is_ioc_y | is_satellite_y | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | -179.7978 | -38.2001 | NaN | NaN | NaN | NaN | dnzc | <NA> | New Zealand | DART New Zealand E Coast | ... | 82.0 | 2952 | 2976.0 | 99.0 | 100.0 | 15' | 30' | 6h | True | False |
1 | -179.7978 | -38.2001 | NaN | NaN | NaN | NaN | dnzc2 | <NA> | New Zealand | DART New Zealand E Coast | ... | 82.0 | 2928 | 2976.0 | 98.0 | 100.0 | 15' | 209' | 6h | True | False |
2 | -178.5800 | -51.4200 | UJ040 SOUS00 SA040 | True | False | NaN | <NA> | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | |
3 | -178.5800 | -29.7200 | UJ211 SOUS00 SA211 | True | False | NaN | <NA> | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | |
4 | -178.5800 | -1.9800 | UJ466 SOUS00 SA466 | True | False | NaN | <NA> | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
3318 | 178.6037 | -36.0999 | NaN | NaN | NaN | NaN | dnzd2 | <NA> | New Zealand | DART New Zealand | ... | 100.0 | 2968 | 2976.0 | 100.0 | 100.0 | 15' | 197' | 6h | True | False |
3319 | 179.0962 | -40.5992 | NaN | NaN | NaN | NaN | dnzb | <NA> | New Zealand | DART New Zealand E Coast | ... | 100.0 | 2976 | 2976.0 | 100.0 | 100.0 | 15' | 30' | 6h | True | False |
3320 | 179.0962 | -40.5992 | NaN | NaN | NaN | NaN | dnzb2 | <NA> | New Zealand | DART New Zealand E Coast | ... | 0.0 | NaN | NaN | 0.0 | 0.0 | 15' | NaN | 6h | True | False |
3321 | 179.1949 | -8.5033 | NaN | NaN | NaN | NaN | fong | 121 | Tuvalu Islands | Fongafale TV | ... | 100.0 | 44613 | 44640.0 | 100.0 | 100.0 | 1' | 2' | 3' | True | False |
3322 | 179.1950 | -8.5250 | Funafuti Tuvalu | UH025 SOUS00 GL121 | False | False | NaN | <NA> | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
3323 rows × 30 columns
In [ ]:
m['is_ioc'] = m['is_ioc_x'].fillna(m['is_ioc_y'])
m['is_satellite'] = m['is_satellite_x'].fillna(m['is_satellite_y'])
m["id"] = m["ID"].fillna(m["ioc_code"])
m = m.drop(columns=['is_ioc_x', 'is_ioc_y', 'is_satellite_x', 'is_satellite_y'])
m
/tmp/ipykernel_26336/2921760109.py:1: FutureWarning: Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)` m['is_ioc'] = m['is_ioc_x'].fillna(m['is_ioc_y']) /tmp/ipykernel_26336/2921760109.py:2: FutureWarning: Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)` m['is_satellite'] = m['is_satellite_x'].fillna(m['is_satellite_y'])
Out[Â ]:
lon | lat | Info | ID | ioc_code | gloss_id | country | location | connection | dcp_id | ... | observations_arrived_per_month | observations_expected_per_month | observations_ratio_per_month | observations_ratio_per_day | sample_interval | average_delay_per_day | transmit_interval | is_ioc | is_satellite | id | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | -179.7978 | -38.2001 | NaN | NaN | dnzc | <NA> | New Zealand | DART New Zealand E Coast | web | NaN | ... | 2952 | 2976.0 | 99.0 | 100.0 | 15' | 30' | 6h | True | False | dnzc |
1 | -179.7978 | -38.2001 | NaN | NaN | dnzc2 | <NA> | New Zealand | DART New Zealand E Coast | SZNZ43 | 5401000 | ... | 2928 | 2976.0 | 98.0 | 100.0 | 15' | 209' | 6h | True | False | dnzc2 |
2 | -178.5800 | -51.4200 | UJ040 SOUS00 SA040 | NaN | <NA> | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | False | True | UJ040 SOUS00 SA040 | |
3 | -178.5800 | -29.7200 | UJ211 SOUS00 SA211 | NaN | <NA> | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | False | True | UJ211 SOUS00 SA211 | |
4 | -178.5800 | -1.9800 | UJ466 SOUS00 SA466 | NaN | <NA> | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | False | True | UJ466 SOUS00 SA466 | |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
3318 | 178.6037 | -36.0999 | NaN | NaN | dnzd2 | <NA> | New Zealand | DART New Zealand | SZNZ44 | 5501004 | ... | 2968 | 2976.0 | 100.0 | 100.0 | 15' | 197' | 6h | True | False | dnzd2 |
3319 | 179.0962 | -40.5992 | NaN | NaN | dnzb | <NA> | New Zealand | DART New Zealand E Coast | web | NaN | ... | 2976 | 2976.0 | 100.0 | 100.0 | 15' | 30' | 6h | True | False | dnzb |
3320 | 179.0962 | -40.5992 | NaN | NaN | dnzb2 | <NA> | New Zealand | DART New Zealand E Coast | SZNZ42 | 5501003 | ... | NaN | NaN | 0.0 | 0.0 | 15' | NaN | 6h | True | False | dnzb2 |
3321 | 179.1949 | -8.5033 | NaN | NaN | fong | 121 | Tuvalu Islands | Fongafale TV | SZPS01 | 67440 | ... | 44613 | 44640.0 | 100.0 | 100.0 | 1' | 2' | 3' | True | False | fong |
3322 | 179.1950 | -8.5250 | Funafuti Tuvalu | UH025 SOUS00 GL121 | NaN | <NA> | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | False | False | UH025 SOUS00 GL121 |
3323 rows × 29 columns
In [ ]:
m.hvplot.points(
x='lon', y='lat',
c='is_ioc',
geo=True,
tiles = True,
cmap = "coolwarm",
hover_cols=["id"],
).opts(
width = 1800,
height=1000,
title="False = STOFS list, True = IOC database",
)
Out[Â ]:
In [ ]:
m.hvplot.points(
x='lon', y='lat',
c='is_satellite',
geo=True,
tiles = True,
cmap = "coolwarm",
hover_cols=["id"],
).opts(
width = 1800,
height=1000,
title="Does the Point corresponds to setellite location? False / True",
)
Out[Â ]: