This notebook contains the methodology for:
- choosing the best IOC candidates for storm surge validation purposes
an edit of this notebook will be done for:
- the data availability
- the data quality of IOC candidates
import geopandas as gp
import pandas as pd
import searvey
from datetime import datetime
import numpy as np
import sklearn.neighbors
import xarray as xr
import hvplot.pandas
import os
/home/tomsail/miniconda3/envs/searvey/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html from .autonotebook import tqdm as notebook_tqdm
world_oceans = gp.read_file("https://gist.github.com/tomsail/2fa52d9667312b586e7d3baee123b57b/raw/23929561eaa8aa76376580a7df300c4e3eb2e509/world_maritime_sectors.json")
IOC_CLEANUP = "ioc_cleanup_2023.csv"
ioc_cleanup = pd.read_csv(IOC_CLEANUP, index_col=0).rename(columns={"longitude": 'lon', "latitude": 'lat', "Station_Name":"location","Country":"country"})
( world_oceans.hvplot(c='ocean',geo=True).opts(cmap='tab20c') *
ioc_cleanup.hvplot.points(x="lon",y="lat",c='k', s= 40,geo=True,coastline=True)
).opts(height=450)
WARNING:param.GeoOverlayPlot00566: Due to internal constraints, when aspect and width/height is set, the bokeh backend uses those values as frame_width/frame_height instead. This ensures the aspect is respected, but means that the plot might be slightly larger than anticipated. Set the frame_width/frame_height explicitly to suppress this warning.
Our experience with skill-panel-demo got us the to following conclusion:
The previous set of stations from ioc_cleanup
is not sufficient. We now need to have:
- more stations to compare with models
- more time coverage
We want to have a maximum of stations that correspond with STOFS2D output locations. as we want to compare: IOC observations vs our model vs STOFS2D output locations
# The latest STOFS2D locations output locations (for STOFS2D version 2.1.0) are:
def get_stofs():
mycols = [str(i) for i in range(6)] # we expect 17 cols max in that file
stof2d = pd.read_csv(
"https://polar.ncep.noaa.gov/stofs/data/stofs_2d_glo_elev_stat_v2_1_0",
names=mycols,
sep="\t+|!",
header=None,
skiprows=1
)
stof2d['Info'] = stof2d.apply(lambda row: ' '.join(filter(None, row[2:])), axis=1)
stof2d['ID'] = stof2d['Info'].apply(lambda x: ' '.join(x.split()[:3]))
stof2d['Info'] = stof2d.apply(lambda row: row['Info'].replace(row['ID'], '').strip(), axis=1)
stof2d = stof2d.drop(columns=["2", "3", "4", "5"])
stof2d.rename(columns={"0": 'lon', "1": 'lat'}, inplace=True)
return stof2d
stofs = get_stofs()
stofs.hvplot.points(geo=True,coastline=True).opts(height=450)
/tmp/ipykernel_31579/2200778739.py:4: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support regex separators (separators > 1 char and different from '\s+' are interpreted as regex); you can avoid this warning by specifying engine='python'. stof2d = pd.read_csv( WARNING:param.GeoOverlayPlot00717: Due to internal constraints, when aspect and width/height is set, the bokeh backend uses those values as frame_width/frame_height instead. This ensures the aspect is respected, but means that the plot might be slightly larger than anticipated. Set the frame_width/frame_height explicitly to suppress this warning.
A caveat is that the 1D output files evolve over time:
stofs1 = xr.open_dataset("stofs2d/20220912_stofs_2d_glo.t12z.points.swl.nc")
stofs2 = xr.open_dataset("stofs2d/20231010_stofs_2d_glo.t00z.points.swl.nc")
stofs3 = xr.open_dataset("stofs2d/20241229_stofs_2d_glo.t00z.points.swl.nc")
#SEE APPENDIX FOR DOWNLOADING STOFS2D DATA
stofs_2022 = stofs[stofs.ID.isin([' '.join(s.decode("utf-8").strip().split()[:3]) for s in stofs1.station_name.values])];len(stofs_2022)
stofs_2023 = stofs[stofs.ID.isin([' '.join(s.decode("utf-8").strip().split()[:3]) for s in stofs2.station_name.values])];len(stofs_2023)
stofs_2024 = stofs[stofs.ID.isin([' '.join(s.decode("utf-8").strip().split()[:3]) for s in stofs3.station_name.values])];len(stofs_2024)
562
1687
1688
luckily the new stations were appended at the end of the file. So this will be easier to concatenate data between all the files
stofs_2022[:557].equals(stofs_2023[:557])
stofs_2022[:557].equals(stofs_2024[:557])
True
True
We need to compare model storm surge with observation. We use IOC tide stations
def get_meta() -> gp.GeoDataFrame:
meta_web = searvey.get_ioc_stations().drop(columns=["lon", "lat"])
meta_api = (
pd.read_json(
"http://www.ioc-sealevelmonitoring.org/service.php?query=stationlist&showall=all"
)
.drop_duplicates()
.drop(columns=["lon", "lat"])
.rename(columns={"Code": "ioc_code", "Lon": "lon", "Lat": "lat"})
)
merged = pd.merge(
meta_web,
meta_api[["ioc_code", "lon", "lat"]].drop_duplicates(),
on=["ioc_code"],
)
return merged.drop(columns=["geometry"])
ioc_ = get_meta()
We already have established a database for clean IOC data between 2022 and 2023 (see 1st plot), we'll use it as a reference:
stofs_plot = stofs_2022.hvplot.scatter(x= "lon", y="lat", hover_cols = "ID", s=130, c='lightgrey', label = 'STOFS 2022 output stations')
stofs_plot1 = stofs_2023.hvplot.scatter(x="lon", y="lat", hover_cols = "ID", s=150, c='grey', label = 'STOFS 2023 output stations')
stofs_plot2 = stofs_2024.hvplot.scatter(x="lon", y="lat", hover_cols = "ID", s=200, c='k', label = 'STOFS 2024 output stations')
ioc_plot = ioc_.hvplot.scatter(x="lon", y="lat",hover_cols = "ioc_code", s= 30 , c = 'y', label = 'all IOC stations')
ioc_cleanup_plot = ioc_cleanup.hvplot.scatter(coastline=True,x="lon", y="lat",s = 80, c='r', label = "stations cleaned for 2022-2023")
(stofs_plot2 * stofs_plot1 * stofs_plot * ioc_cleanup_plot* ioc_plot).opts(width = 1300, height = 600)
WARNING:param.main: geo option cannot be used with kind='scatter' plot type. Geographic plots are only supported for following plot types: ['bivariate', 'contour', 'contourf', 'dataset', 'hexbin', 'image', 'labels', 'paths', 'points', 'points', 'polygons', 'quadmesh', 'rgb', 'vectorfield']
We graphically detected all stations not already used in ioc_cleanup
and corresponding with STOFS2D output locations
station_to_add = [
"juan", "sanf", "anto", "ptmo", "valp", "ferg", "ambon", "bitu", "saum", "sho2", "ushu",
"espr", "gamb", "riki", "prud", "vald", "cord", "paak", "dsea", "ketc", "june", "skag", "sewa", "anch", "niki", "seld", "kodi", "alak",
"dshu", "dkod", "nome", "adak", "niko", "dchu", "midx", "fren", "sthl", "ascen", "jask", "chab", "kara", "musc",
"masi", "mais", "kerg", "syow", "ver1", "vern", "wait", "stpa", "sala", "tara", "marsh", "kwaj", "wake", "fong",
"solo", "vanu", "numbo", "numb2", "levu", "wlgt", "jack", "hako", "abas", "ofun", "mera", "toya", "nawi", "brpt", "heeia",
"moku", "mane", "john", "plmy", "xmas", "penr", "hiva", "pape", "raro", "pago", "pagx", "east", "garc", "Male2", "ganm", "male", "hani",
"mini", "coch", "vish", "chtt", "sitt", "moul", "ptbl", "komi", "kota", "lank", "ms001", "sab2", "saba", "vung", "quin",
"quar", "curri", "subi", "mani", "luba", "lega", "tkao", "tkee", "chij", "mins", "saip", "mala", "chuu", "kapi", "deke", "naur", "nauu",
"dumo", "espe", "porl", "hill", "waik", "lemba", "beno", "prgi", "prig", "cili", "cila", "tjls", "chrs", "ffcj", "cocb", "telu", "sibo",
"sib2", "tanjo", "bupo", "padn", "pada", "fpga", "winc", "wbnc", "oinc", "kpva", "leva", "simd", "wsdc", "cbmd", "ocmd", "cmnj", "phap",
"mhpa", "btny", "shnj", "mony", "ptme", "cwme", "epme", "hali", "nain", "nuk1", "nuuk", "qaqo", "reyk", "scor", "rptx", "cctx", "pitx",
"pric", "ftfr", "rose", "barb", "stcr", "lame", "isab", "vieq", "yobu", "yabu", "faja", "sanj", "arac", "maya", "magi", "penu", "mona",
"ptpr", "ptpl", "sama", "bull", "elpo", "limon", "quepo", "sana", "acaj", "acap", "acya", "manz", "mnza", "cabo", "fort", "call", "lobos",
"tala", "lali", "vkfl", "nafl", "fmfl", "spfl", "pnfl", "pbfl", "apfl", "tpfl", "fbfl", "moal", "wlms", "psla", "gila", "pfla", "ncla",
"apla", "eila", "cpla", "sptx", "gptx", "fptx", "bres", "sthm", "casc", "gibr", "ceut", "mars", "TR22", "gvd9", "alex", "palm", "pdas",
"plus", "dakar", "tako", "tkdi", "lagos", "pntn", "sitin", "walvi", "prte", "durb", "pemba", "mtwa", "momb", "lamu", "pmon", "aric", "mata",
"plat", "salv", "blueb",
# extra for Europe
"delf", "bork", "harl", "ters", "denh", "hoek", "kiel", "warn", "euro", "mpcw", "dunk", "boul", "diep", "leha", "ouis", "rosc", "stma", "jers", "leco", "audi", "tudy",
"lecy", "conc", "sain", "leso", "larp", "iaix", "port", "arca", "scoa", "bil3", "san2", "gij2", "vil2", "setu", "arri", "sagr", "albu", "huel",
"bon2", "cadi", "mal3", "motr", "alme", "carb", "murc2", "carg", "alac", "gand", "vale", "sagu", "tarr", "barc", "tst", "ptve", "ptln", "sete", "fosm",
"toul", "figu", "monc", "cent", "rous", "ajac", "sole", "GE25", "LA38", "LI11", "MC41", "PT17", "CF06", "CA02", "MRTM", "PA07", "usti", "matel",
"ME13", "RC09", "ST44", "GI20", "PDCR", "PLBR", "SC43", "PE09", "PE21", "PRTP", "CT03", "pant", "PSCA", "ppcp", "CI20", "AZ42", "PO40", "SA16", "GA37",
"NA23", "PL14", "CETR", "RCCL", "CR08", "lcst", "TA18", "tara1", "OT15", "MNPL", "BA05", "BRLT", "VI12", "IT45", "OR24", "SB36", "AN15", "RA10", "VE19",
"baka", "stari", "vela", "sobr", "corf", "prev", "zkth", "kata", "kypa", "kala", "koro", "kaps", "kast", "pale", "hrak", "iera", "kaso", "aigi", "pano",
"peir", "noat", "syro", "myko", "delo", "thes", "smth", "gokc", "bozc", "plom", "ment", "bodr", "kos", "plim", "kalt", "mrms", "feth", "bozy", "tasu",
"erdem", "arsu", "iske", "girn", "papho", "leme", "zygi", "larn", "para", "gazi", "batr", "haif", "hade", "ashd", "askl", "psail", "matr", "mang", "csta",
"sino", "kaci", "sams", "trab", "elja", "said"
]
some station can be declined in different names
possible_stations = []
all_ioc = ioc_.ioc_code.values
for stat in station_to_add:
if any(stat in station for station in all_ioc):
for station in all_ioc:
if stat in station:
possible_stations.append(station)
ioc_to_add = ioc_[ioc_.ioc_code.isin(possible_stations)]
ioc_to_add
ioc_code | gloss_id | country | location | connection | contacts | added_to_system | observations_arrived_per_week | observations_expected_per_week | observations_ratio_per_week | ... | sample_interval | average_delay_per_day | transmit_interval | dcp_id | last_observation_level | last_observation_time | delay | interval | lon | lat | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | abas | 327 | Japan | Abashiri | SWJP40 | Japan Meteorological Agency ( Japan ) | 2012-03-21 09:54:59 | 10040 | 10080.0 | 100 | ... | 1' | 8' | 10' | ABASHIRI | 1.66 | 07:59 | 28' | 10' | 144.290000 | 44.020000 |
3 | acaj | 182 | El Salvador | Acajutla SV | SZXX01 | Ministerio de Medio Ambiente y Recursos Natura... | 2008-06-20 12:17:00 | 9750 | 10080.0 | 97 | ... | 1' | 7' | 5' | 300434064008810 | 0.56 | 08:09 | 18' | 5' | -89.838128 | 13.573792 |
4 | acap | 267 | Mexico | Acapulco MX | SEPA40 | Centro de Investigación CientÃfica y de Educac... | 2008-04-28 12:36:00 | -down- | 10080.0 | 0 | ... | 1' | NaN | 5' | 3540E15A | 8.26 | -down- | 2799d | 5' | -99.916600 | 16.833300 |
5 | acap2 | 267 | Mexico | Acapulco API | SOMX10 | Universidad Nacional Autónoma de México ( Mexi... | 2014-05-19 10:50:47 | 10020 | 10080.0 | 99 | ... | 1' | 9' | 10' | 0100D7CA | 4.37 | 08:06 | 21' | 10' | -99.903000 | 16.837933 |
9 | acya | 267 | Mexico | Acapulco Club de Yates | ftp | Universidad Nacional Autónoma de México ( Mexi... | 2010-08-10 09:24:41 | NaN | 10080.0 | 0 | ... | 1' | NaN | 10' | NaN | 1.31 | 2025-03-05 14:59 | 17h | 10' | -99.902980 | 16.837990 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1631 | yabu | <NA> | USA | Yabucoa Harbor, PR | SXXX03 | Puerto Rico Seismic Network ( USA ) +National ... | 2010-03-26 14:06:00 | 9828 | 10080.0 | 98 | ... | 1' | 7' | 6' | 3366B5CA | -6.27 | 08:08 | 19' | 6' | -65.833000 | 18.055100 |
1636 | yobu | <NA> | Puerto Rico | Yobucou PR | SXXX03 | NaN | 2006-06-07 04:30:00 | NaN | NaN | 0 | ... | ' | NaN | 6' | 3366B5CA | NaN | NaN | NaN | 6' | -65.833000 | 18.050100 |
1643 | zkth | <NA> | Greece | Zakynthos, Ionian Islands | bgan | National Observatory of Athens ( Greece ) | 2023-09-29 06:31:39 | 10032 | 10080.0 | 100 | ... | 1' | 1' | 1' | GR-ZKTH-00 | 0.01 | 08:25 | 2' | 1' | 20.905200 | 37.781420 |
1646 | zygi | <NA> | Cyprus | Zygi | ftp | Cyprus Oceanography Center ( Cyprus ) | 2011-09-07 14:55:04 | -down- | 20160.0 | 0 | ... | 0.5' | NaN | 1' | NaN | 1.91 | -down- | 3548d | 1' | 33.338375 | 34.727083 |
1647 | zygi1 | <NA> | Cyprus | Zygi | bgan | Cyprus Marine and Maritime Institute ( Cyprus ... | 2018-03-16 05:24:01 | NaN | 10080.0 | 0 | ... | 1' | NaN | 1' | ZYGI1 | NaN | 2022-03-18 00:47 | 1084d | 1' | 33.340228 | 34.726315 |
550 rows × 24 columns
stofs_plot = stofs_2022.hvplot.scatter(x= "lon", y="lat", hover_cols = "ID", s=130, c='lightgrey', label = 'STOFS 2022 output stations')
stofs_plot1 = stofs_2023.hvplot.scatter(x="lon", y="lat", hover_cols = "ID", s=150, c='grey', label = 'STOFS 2023 output stations')
stofs_plot2 = stofs_2024.hvplot.scatter(x="lon", y="lat", hover_cols = "ID", s=200, c='k', label = 'STOFS 2024 output stations')
ioc_cleanup_plot = ioc_cleanup.hvplot.scatter(x="lon", y="lat",hover_cols = "ioc_code",s = 90, c='r',label = 'stations already cleaned for 2022-2023')
ioc_to_add_plot = ioc_to_add.hvplot.scatter(coastline=True,x="lon", y="lat",hover_cols = "ioc_code", s = 90, c = 'g', label = 'stations to be added')
(stofs_plot2 * stofs_plot1 * stofs_plot * ioc_to_add_plot * ioc_cleanup_plot).opts(width = 1400, height = 600)
WARNING:param.main: geo option cannot be used with kind='scatter' plot type. Geographic plots are only supported for following plot types: ['bivariate', 'contour', 'contourf', 'dataset', 'hexbin', 'image', 'labels', 'paths', 'points', 'points', 'polygons', 'quadmesh', 'rgb', 'vectorfield']
the 2024 IOC cleanup database is the red + green points
ioc_cleanup_2024 = pd.concat([ioc_cleanup,ioc_to_add])
ioc_cleanup_2024
location | ioc_code | gloss_id | lat | lon | country | connection | contacts | dcp_id | last_observation_level | ... | number_of_years | time_zone_hours | datum_information | instrument | precision | null_value | gauge_type | overall_record_quality | gesla3_id | seaset_id | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
125 | Base O'Higgins | ohig | <NA> | -63.321000 | -57.901000 | Antarctica | SXCH40 | Servicio Hidrográfico y Oceanográfico de la Ar... | ADC04BE6 | 1.75 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 125.0 |
135 | Puerto Deseado | dese | 190.0 | -47.754000 | -65.915000 | Argentina | SEPO40 | Armada Argentina Servicio de HidrografÃa Naval... | 33912088 | 3.69 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 135.0 |
136 | Puerto Madryn | madry | 191.0 | -42.763000 | -65.031000 | Argentina | SEPO40 | Armada Argentina Servicio de HidrografÃa Naval... | 335665D2 | 6.60 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 136.0 |
139 | Battery Point | bapj | <NA> | -42.892000 | 147.338000 | Australia | SZAU01 | National Tidal Centre/Australian Bureau of Met... | 61221 | 0.91 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 139.0 |
140 | Broome | brom | 40.0 | -18.001000 | 122.219000 | Australia | SZAU01 | National Tidal Centre/Australian Bureau of Met... | 62650 | 7.69 | ... | 32.0 | 0.0 | Unspecified | Unspecified | Unspecified | -99.9999 | Coastal | No obvious issues | Broome | 140.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1631 | Yabucoa Harbor, PR | yabu | <NA> | 18.055100 | -65.833000 | USA | SXXX03 | Puerto Rico Seismic Network ( USA ) +National ... | 3366B5CA | -6.27 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1636 | Yobucou PR | yobu | <NA> | 18.050100 | -65.833000 | Puerto Rico | SXXX03 | NaN | 3366B5CA | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1643 | Zakynthos, Ionian Islands | zkth | <NA> | 37.781420 | 20.905200 | Greece | bgan | National Observatory of Athens ( Greece ) | GR-ZKTH-00 | 0.01 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1646 | Zygi | zygi | <NA> | 34.727083 | 33.338375 | Cyprus | ftp | Cyprus Oceanography Center ( Cyprus ) | NaN | 1.91 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1647 | Zygi | zygi1 | <NA> | 34.726315 | 33.340228 | Cyprus | bgan | Cyprus Marine and Maritime Institute ( Cyprus ... | ZYGI1 | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
719 rows × 77 columns
def find_nearest_nodes(
mesh_nodes: pd.DataFrame,
points: pd.DataFrame,
metric: str = "haversine",
earth_radius = 6371000,
):
"""
Calculate the mesh nodes that are nearest to the specified `points`.
Both `mesh_nodes` and `points` must be `pandas.DataFrames` that have
columns named `lon` and `lat` and the coords must be in EPSG:4326.
Returns the `points` DataFrame after adding these extra columns:
- `mesh_index` which is the index of the node in the `hgrid.gr3` file
- `mesh_lon` which is the longitude of the nearest mesh node
- `mesh_lat` which is the latitude of the nearest mesh node
- `distance` which is the distance in meters between the point and the nearest mesh node
Examples:
>>> mesh_nodes = pd.DataFrame({
... "lon": [0, 10, 20],
... "lat": [0, 5, 0],
... })
>>> points = pd.DataFrame({
... "lon": [1, 11, 21],
... "lat": [1, 4, 1],
... "id": ["a", "b", "c"],
... })
>>> nearest_nodes = find_nearest_nodes(mesh_nodes, points)
>>> nearest_nodes
lon lat id mesh_index mesh_lon mesh_lat distance
0 1 1 a 0 0 0 157249.381272
1 11 4 b 1 10 5 157010.162641
2 21 1 c 2 20 0 157249.381272
"""
# The only requirement is that both `mesh_nodes and `points` have `lon/lat` columns
tree = sklearn.neighbors.BallTree(
np.radians(mesh_nodes[["lat", "lon"]]),
metric=metric,
)
distances, indices = tree.query(np.radians(points[["lat", "lon"]].values))
closest_nodes = (
mesh_nodes
.rename(columns={"lon": "mesh_lon", "lat": "mesh_lat"})
.iloc[indices.flatten()]
.assign(distance=(distances.flatten() * earth_radius))
.reset_index(names=["mesh_index"])
)
return pd.concat((points.reset_index(drop = True), closest_nodes), axis="columns")
# 2 - get STOFS
nearest_nodes_2022 = find_nearest_nodes(stofs_2022, ioc_cleanup_2024[["lon","lat","ioc_code","location"]])
nearest_nodes_2023 = find_nearest_nodes(stofs_2023, ioc_cleanup_2024[["lon","lat","ioc_code","location"]])
nearest_nodes_2024 = find_nearest_nodes(stofs_2024, ioc_cleanup_2024[["lon","lat","ioc_code","location"]])
nearest_nodes_2022 = nearest_nodes_2022[~nearest_nodes_2022.mesh_index.isna()]
nearest_nodes_2023 = nearest_nodes_2023[~nearest_nodes_2023.mesh_index.isna()]
nearest_nodes_2024 = nearest_nodes_2024[~nearest_nodes_2024.mesh_index.isna()]
keep_nodes_2022 = nearest_nodes_2022[nearest_nodes_2022.distance < 5000]
keep_nodes_2023 = nearest_nodes_2023[nearest_nodes_2023.distance < 5000]
keep_nodes_2024 = nearest_nodes_2024[nearest_nodes_2024.distance < 5000]
keep_nodes_2022.to_csv("keep_nodes_2022.csv")
keep_nodes_2023.to_csv("keep_nodes_2023.csv")
keep_nodes_2024.to_csv("keep_nodes_2024.csv")
red are all the STOFS2D points to be extracted
p2 = stofs_2022.hvplot.scatter(x="lon", y="lat", hover_cols = "ID", s=70, c='grey',line_color="lightgrey", label = 'STOFS 2022 output stations')
ip = ioc_cleanup_2024.hvplot.scatter(x="lon", y="lat",s = 10, c='k',coastline=True, label = 'IOC_CLEANUP 2022-2024')
k2 = keep_nodes_2022.hvplot.scatter(x="lon", y="lat", c = 'red', s = 20,coastline=True, label = "STOFS2D stations to be extracted")
# (world_oceans.hvplot(c='ocean',alpha= 0.9).opts(cmap='tab20c') * p2 * ip * k2 ).opts(width = 1100, height = 800)
(world_oceans.hvplot(c='ocean',alpha= 0.9).opts(cmap='tab20c') * p2 * ip ).opts(width = 1500, height = 900)
WARNING:param.main: geo option cannot be used with kind='scatter' plot type. Geographic plots are only supported for following plot types: ['bivariate', 'contour', 'contourf', 'dataset', 'hexbin', 'image', 'labels', 'paths', 'points', 'points', 'polygons', 'quadmesh', 'rgb', 'vectorfield'] WARNING:param.main: geo option cannot be used with kind='scatter' plot type. Geographic plots are only supported for following plot types: ['bivariate', 'contour', 'contourf', 'dataset', 'hexbin', 'image', 'labels', 'paths', 'points', 'points', 'polygons', 'quadmesh', 'rgb', 'vectorfield']
download IOC data
for i_s, station in ioc_cleanup_2024.iterrows():
if os.path.exists(f"data/{station.ioc_code}.parquet"):
print(station.ioc_code, "done")
else:
print(station.ioc_code, "downloading..")
df = searvey.fetch_ioc_station(station.ioc_code, "2022-01-01", "2024-12-31")
df.to_parquet(f"data/{station.ioc_code}.parquet")
ohig done dese done madry done bapj done brom done barn done djve done darw done pkem done pmur done ross done sprg done thev done trst done oste done bele done bamf done prin done stjo done vhbc done greg done ohig3 done cald done coqu done corr done pich done ptal done pcha done pwil done qtro done quir done talc done viti done cher done dzao done herb done mare done nuku done stqy done stqy2 done tubua done cuxh done helg done horn done itea done LA23 done abur done fuka done hmda done hana done ishig done kusm done kush done naga done naha done omae done sado done saig done tosa done waka done kant done huat done chst done ande done honn done malo done rorv done treg done vard done davo done dapi done hie2 done fue2 done coru done arko done fors done furu done gokr done holm done kalit done karl done klag done kung done kungr done land done olan done oska done oxel done rata done simp done simr done smog done spik done visb done zanz done nkfa done amas done anta done igne done sile done abed done bang done crom done dove done harw done heys done holy done holy2 done ilfa done kinl done leit done lerw done lerw2 done live done lowe done mhav done mill done mumb done nhav done newl done newl2 done npor done nshi done plym done porp done prus done ptmt done shee done stor done whit done wick done work done alam done aren done asto done atka done acnj done bamd done bame done benc done boma done bgct done amal done chrp done cwfl done cres done dkwa done datu done dpnc done dutc done elak done fpnt done guam done hilo done kahu done kawa done kwfl done lajo done lime done pagb done pslu done sdpt done sitk done wpwa done wood done yaku done colo done dnze done prin2 done abas done acaj done acap done acap2 done acya done adak done adak2 done aigi done ajac done ajac2 done alac1 done alac2 done alak done alak2 done albu done alex done alex1 done alex2 done alex3 done alme done alme2 done ambon done AN15 done anch done anch2 done anto done anto2 done apfl done apfl2 done apla done apla2 done arac done aracS done arca done arca2 done aric done aric2 done arri done arsu done ascen done ashd done ashd1 done askl done audi done audi2 done AZ42 done BA05 done baka done barb done barb2 done barc done barc2 done batr done beno done bil3 done bitu done blueb done bodr done bodri done bon2 done bork done boul done boul2 done bozc done bozy done bres done bres2 done BRLT done brpt done btny done btny2 done bull done bupo done CA02 done cabo done cadi done call done carb done carg done casc done cbmd done cbmd2 done cctx done cctx2 done cent done cent2 done CETR done ceut done ceut1 done CF06 done chab done chij done chrs done chtt done chuu done CI20 done cila done cili done cmnj done cmnj2 done cocb done coch done conc done conc2 done cord done cord2 done corf done cpla done cpla2 done CR08 done csta done csta2 done csta3 done CT03 done curri done cwme done cwme2 done dakar done dchu done dchu2 done deke done delf done delo done denh done diep done diep2 done dkod done dkod2 done dsea done dsea2 done dshu done dshu2 done dumo done dunk done dunk2 done durb done east done east2 done eila done elja1 done elpo done epme done epme2 done erdem done espe done espr done euro done faja done fbfl done fbfl2 done ferg done feth done ffcj done figu done figu2 done fmfl done fmfl2 done fong done fort done fort2 done fosm done fosm2 done fpga done fpga2 done fptx done fren done ftfr done ftfr2 done GA37 done gamb done gand done ganm done garc done gazi done GE25 done GI20 done gibr done gibr2 done gibr3 done gij2 done gila done gila2 done girn done gokc done gptx done gptx2 done gvd9 done hade done hade2 done haif done hako done hali done hani done harl done heeia done hill done hiva done hoek done hrak done htst done huel done iaix done iera done isab done iske done IT45 done jack done jask done jask2 done jers done john done juan done juan2 done june done kaci done kala done kalt done kapi done kaps done kara done kaso done kast done kata done kerg done kerg2 done ketc done kiel done kodi done kodi2 done komi done koro done kos1 done kos2 done kota done kpva done kwaj done kwaj2 done kypa done LA38 done lagos done lali done lame done lame2 done lamu done lank done larn done larn2 done larp done larp2 done lcst done leco done leco2 done lecy done lega done leha done leha2 done lemba done leme done leso done leso2 done leva done levu done LI11 done limon done lobos done luba done magi done magi2 done mais done mais2 done mal3 done mala done male done Male2 done mane done mang done mani done manz done mars done marsh done masi done masi2 done mata done matel done matr done maya done MC41 done ME13 done ment done mera done mhpa done midx done midx2 done mini done mins done MNPL done mnza done moal done moku done momb done mona done mona2 done monc done monc2 done mony done mony2 done motr done moul done mpcw done mrms done MRTM done ms001 done mtwa done murc2 done musc done musc3 done myko done NA23 done nafl done nain done naur done nauu done nawi done nawi2 done ncla done niki done niko done noat done nome done nome2 done nuk1 done numb2 done numbo done nuuk done ocmd done ofun done oinc done OR24 done OT15 done ouis done ouis2 done PA07 done paak done paak2 done pada done padn done pago done pago2 done pagx done pale done palm done palm1 done pano done pant done pape done pape2 done papho done para done pbfl done pdas done PDCR done PE09 done PE21 done peir done pemba done penr done penu done pfla done pitx done PL14 done plat done plat2 done PLBR done plim done plmy done plom done pmon done pmon2 done pnfl done pnfl2 done pntn done PO40 done porl done port done port2 done ppcp done prev done prgi done pric done prig done prte done PRTP done prud done prud2 done PSCA done psla done psla2 done PT17 done ptbl done ptln done ptln2 done ptme done ptpl done ptpr done ptve done ptve2 done qaqo done quar done quepo done quin done RA10 done raro done raro2 done RC09 done RCCL done reyk done riki done rosc done rosc2 done rose done rous done rous2 done rptx done SA16 done sab2 done saba done sagr done sagu done said done sain done sain2 done saip done sala done salav done salv done salv3 done sama done sama2 done sams done san2 done sana done sanf done sanf3 done sanj done sanj2 done saum done SB36 done SC43 done scoa done scoa2 done scor done seld done sete done sete2 done setu done sewa done sewa2 done shnj done sho2 done sib2 done sibo done simd done sino done sitin done sitt done sjuan done skag done skagk done smth downloading.. sobr downloading.. sole downloading.. sole2 downloading.. solo done spfl done sptx done ST44 downloading..
IOC-ST44: No data. Creating a dummy dataframe
stari downloading.. stcr done stcr2 done sthl done sthl2 done sthm done stma downloading.. stma2 downloading.. stpa done subi done syow done syro downloading.. TA18 downloading.. tako done tala done tanjo done tara done tara1 done tarr downloading.. tasu downloading.. telu done ters downloading.. thes downloading.. tjls done tkao done tkdi done tkee done toul downloading.. toul2 downloading.. toya done tpfl done TR22 done trab downloading.. tst1 downloading.. tst2 downloading..
IOC-tst2: Dropped duplicates: 15477 rows IOC-tst2: Dropped duplicates: 15194 rows IOC-tst2: Dropped duplicates: 3565 rows IOC-tst2: Dropped duplicates: 26706 rows IOC-tst2: Dropped duplicates: 13797 rows IOC-tst2: Dropped duplicates: 31348 rows IOC-tst2: Dropped duplicates: 20907 rows IOC-tst2: Dropped duplicates: 11034 rows
tudy downloading.. ushu done usti downloading.. vald done vald2 done vale downloading.. valp done valp2 done valp3 done vanu done VE19 downloading.. vela downloading..
IOC-vela: No data. Creating a dummy dataframe
ver1 done vern done VI12 downloading.. vieq done vieq2 done vil2 downloading.. vish done vkfl done vung done waik done wait done wake done wake2 done walvi done warn downloading.. wbnc done winc done wlgt done wlms done wlms2 done wsdc done xmas done yabu done yobu done zkth downloading.. zygi downloading..
IOC-zygi: No data. Creating a dummy dataframe
zygi1 downloading..
check data availability
import re
import glob
ioc_cleanup_2024['n_sensors'] = 0
pattern = r"data/([\w\d]+)\.parquet"
stations = [re.search(pattern, path).group(1) for path in glob.glob("data/*parquet")]
f"Total stations: {len(stations)}"
# get the stations with data
keep_stations = []
for station in sorted(stations):
df = pd.read_parquet(f"data/{station}.parquet")
if df.empty:
pass
else:
print(station, end=" ")
keep_stations.append(station)
df = df.drop(columns=[col for col in ["sw1", "sw2", "bat"] if col in df.columns])
print(list(df.columns))
ioc_cleanup_2024.loc[ioc_cleanup_2024.ioc_code == station, "n_sensors"] = len(df.columns)
# disregard sw1, sw2 and bat
f"Stations with data: {len(keep_stations)}"
'Total stations: 719'
AN15 ['rad'] AZ42 ['rad'] BA05 ['rad'] BRLT ['rad'] CA02 ['rad'] CETR ['pr1', 'pr2'] CF06 ['rad'] CI20 ['rad'] CR08 ['rad'] CT03 ['rad'] GA37 ['rad'] GE25 ['rad'] GI20 ['prs'] IT45 ['rad'] LA23 ['rad'] LA38 ['rad'] LI11 ['rad'] MC41 ['rad'] ME13 ['rad'] MNPL ['rad'] MRTM ['pr1', 'pr2'] Male2 ['prs', 'ra2', 'rad'] NA23 ['rad'] OR24 ['rad'] OT15 ['rad'] PA07 ['rad'] PDCR ['pr1', 'pr2'] PE09 ['prs'] PL14 ['rad'] PLBR ['pr1'] PO40 ['rad'] PRTP ['pr1', 'pr2'] PSCA ['pr1', 'pr2'] PT17 ['rad'] RA10 ['rad'] RC09 ['rad'] RCCL ['pr1', 'pr2'] SA16 ['rad'] SB36 ['rad'] SC43 ['rad'] TA18 ['rad'] TR22 ['rad'] VE19 ['rad'] VI12 ['rad'] abas ['rad'] abed ['bub'] abur ['rad'] acaj ['atm', 'prs', 'ra2', 'rad'] acap2 ['rad'] acnj ['wls'] acya ['flt'] adak ['wls'] adak2 ['pwl'] aigi ['rad'] ajac ['rad'] ajac2 ['rad'] alac1 ['rad'] alac2 ['rad'] alak ['pwl'] alak2 ['pwl'] alam ['pwl'] albu ['rad'] alex1 ['rad'] alex2 ['rad'] alex3 ['pwl'] alme ['rad'] alme2 ['rad'] amal ['pwl'] amas ['rad'] ambon ['enc', 'prs', 'rad'] anch ['pwl'] anch2 ['pwl'] ande ['flt'] anta ['rad'] anto ['prs', 'rad'] anto2 ['prs', 'rad'] apfl ['pwl'] apfl2 ['pwl'] apla ['wls'] apla2 ['pwl'] arac ['prs'] aracS ['pwl'] arca ['rad'] arca2 ['rad'] aren ['pwl'] aric ['prs', 'rad'] aric2 ['prs', 'rad'] arko ['rad'] arri ['rad'] arsu ['rad'] ascen ['atm', 'prs', 'ra2', 'rad'] asto ['pwl'] atka ['wls'] audi ['rad'] audi2 ['rad'] baka ['rad'] bamd ['pwl'] bame ['pwl'] bamf ['enc'] bang ['bub'] bapj ['rad'] barb2 ['pwl'] barc ['rad'] barc2 ['ra2', 'rad', 'atm'] barn ['aqu'] batr ['rad'] bele ['enc', 'rad'] benc ['pwl'] beno ['prs', 'rad', 'ras'] bgct ['pwl'] bil3 ['rad'] bitu ['prs', 'rad', 'ras'] blueb ['prs', 'rad'] bodr ['ecs'] bodri ['rad'] boma ['pwl'] bon2 ['rad'] bork ['flt'] boul ['rad'] boul2 ['rad'] bozc ['rad'] bozy ['rad'] bres ['rad'] bres2 ['rad'] brom ['aqu'] brpt ['prs', 'rad', 'stp', 'ra2'] btny ['pwl'] btny2 ['pwl'] bull ['bub', 'prs', 'rad'] bupo ['rad'] cadi ['rad'] cald ['prs', 'rad'] call ['prs', 'rad', 'enc'] carb ['rad'] carg ['rad'] casc ['rad'] cbmd ['pwl'] cbmd2 ['pwl'] cent ['rad'] cent2 ['rad'] ceut ['rad'] ceut1 ['rad'] cher ['rad'] chij ['rad'] chrp ['pwl'] chrs ['rad'] chst ['prs'] chtt ['enc', 'prs', 'ra2', 'rad'] chuu ['rad', 'ra2', 'prs'] cili ['prs', 'ra2', 'ra3', 'rad'] cmnj ['pwl'] cmnj2 ['pwl'] cocb ['aqu'] coch ['prs'] colo ['prs', 'ra2', 'rad'] conc ['rad'] conc2 ['rad'] coqu ['prs', 'rad'] cord ['pwl'] cord2 ['pwl'] corf ['rad'] corr ['prs', 'rad'] coru ['flt'] cpla ['wls'] cpla2 ['pwl'] cres ['pwl'] crom ['bub'] csta2 ['rad'] csta3 ['prs'] curri ['enc', 'prs', 'rad'] cuxh ['flt'] cwfl ['pwl'] cwme ['pwl'] cwme2 ['pwl'] dakar ['prs', 'ra2', 'rad'] dapi ['prt', 'prte'] darw ['aqu'] datu ['prt', 'prte'] davo ['prs', 'ra2', 'rad', 'ras'] dchu ['prt', 'prte'] dchu2 ['prt'] deke ['prs'] delf ['flt'] denh ['flt'] dese ['prs', 'rad', 'enc'] diep ['rad'] diep2 ['rad'] djve ['prt', 'prte'] dkod ['prt', 'prte'] dkod2 ['prt'] dkwa ['prt', 'prte'] dnze ['prt', 'prte'] dove ['bub'] dpnc ['wls'] dsea ['prt', 'prte'] dsea2 ['prt', 'prte'] dshu ['prt'] dumo ['prs'] dunk ['rad'] dunk2 ['rad'] dutc ['wls'] dzao ['rad'] east ['prs', 'rad'] east2 ['prs', 'rad'] elak ['pwl'] elja1 ['pwl'] elpo ['prs', 'ra2', 'ra3', 'rad'] epme ['pwl'] epme2 ['pwl'] erdem ['rad'] espe ['aqu'] euro ['flt'] faja ['pwl'] fbfl ['pwl'] fbfl2 ['pwl'] ferg ['aqu'] ffcj ['rad'] figu ['rad'] figu2 ['rad'] fmfl ['pwl'] fmfl2 ['pwl'] fong ['aqu'] fors ['rad'] fort ['enc', 'rad'] fort2 ['enc', 'rad'] fosm ['rad'] fosm2 ['rad'] fpga ['wls'] fpga2 ['pwl'] fpnt ['wls'] fren ['prs', 'ra2', 'rad'] ftfr ['rad'] ftfr2 ['rad'] fue2 ['rad'] fuka ['rad'] furu ['rad'] gamb ['prs', 'rad'] gand ['rad'] ganm ['prs', 'ra2', 'ra3', 'rad', 'ras'] garc ['enc', 'prs', 'rad'] gibr2 ['rad'] gibr3 ['atm', 'pr1', 'pr2', 'rad'] gij2 ['rad'] gila ['wls'] gokc ['ecs'] gokr ['rad'] gptx ['bwl'] greg ['prs', 'rad'] guam ['wls'] hade2 ['rad'] hako ['rad'] hana ['rad'] hani ['prs', 'ra2', 'rad'] harl ['flt'] harw ['bub'] heeia ['rad'] helg ['flt'] herb ['rad'] heys ['bub'] hie2 ['rad'] hill ['aqu'] hilo ['wls'] hiva ['pr2', 'prs'] hmda ['rad'] hoek ['flt'] holm ['rad'] holy2 ['bub'] honn ['flt'] horn ['flt'] hrak ['rad'] huat ['rad'] huel ['rad'] iaix ['rad'] iera ['rad'] igne ['rad'] ilfa ['bub'] isab ['pwl'] ishig ['rad'] itea ['rad'] jack ['rad'] jers ['bub'] john ['enc', 'prs', 'rad'] juan ['prs', 'rad'] juan2 ['prs', 'rad'] kahu ['pwl'] kala ['pr1'] kalit ['rad'] kant ['enb', 'enc', 'prs', 'rad', 'stp'] kapi ['enb', 'enc', 'prs', 'rad'] kaps ['rad'] kara ['pr1', 'rad'] karl ['rad'] kaso ['rad'] kast ['rad'] kata ['pr1'] kawa ['pwl'] kerg ['prs'] kerg2 ['rad'] kiel ['flt'] kinl ['bub'] klag ['rad'] kodi ['wls'] kodi2 ['pwl'] koro ['rad'] kos1 ['rad'] kos2 ['rad'] kung ['rad'] kungr ['rad'] kush ['rad'] kusm ['rad'] kwaj ['wls'] kwaj2 ['pwl'] kwfl ['wls'] lajo ['bwl'] lali ['prs', 'rad', 'ras'] lame ['pwl'] lame2 ['pwl'] lamu ['enc', 'prs', 'rad'] land ['rad'] lank ['prs', 'ra2', 'rad', 'ras'] larn2 ['rad'] larp ['rad'] larp2 ['rad'] leco ['rad'] leco2 ['rad'] lecy ['rad'] lega ['prs', 'rad', 'ra2'] leha ['rad'] leha2 ['rad'] leit ['bub'] lemba ['prs', 'rad', 'ras'] leme ['rad'] lerw2 ['bub'] leso ['rad'] leso2 ['rad'] levu ['aqu'] lime ['pwl'] limon ['prs', 'rad', 'ra2'] live ['bub'] lobos ['rad'] lowe ['bub'] luba ['enc', 'prs', 'rad'] madry ['prs', 'ra2', 'rad'] magi ['pwl'] magi2 ['pwl'] mais ['rad'] mal3 ['rad'] mala ['prs', 'ra2', 'rad', 'ra3'] male ['enc', 'prs', 'rad'] malo ['flt'] mane ['rad'] mang ['rad'] mani ['prs', 'ra2', 'rad'] mare ['prs', 'rad'] mars ['rad'] marsh ['rad'] masi ['enc', 'prs', 'rad'] masi2 ['rad'] mata ['enc', 'prs', 'rad'] matel ['rad'] matr ['pwl'] maya ['pwl'] ment ['rad'] mera ['rad'] mhav ['bub'] midx ['bwl'] midx2 ['pwl'] mill ['bub'] mini ['prs'] mins ['prs'] mnza ['bub', 'rad'] moku ['pwl'] momb ['prs', 'ra2', 'rad'] mona ['pwl'] mona2 ['pwl'] monc ['rad'] monc2 ['rad'] mony ['wls'] mony2 ['pwl'] motr ['rad'] mpcw ['rad'] mrms ['rad'] ms001 ['prs'] mumb ['bub'] murc2 ['rad'] musc ['enc', 'rad', 'prs'] musc3 ['rad'] naga ['rad'] naha ['rad'] nain ['enc'] nauu ['prs'] nawi ['pwl'] newl2 ['bub'] nhav ['bub'] nkfa ['aqu'] noat ['rad'] nome ['bwl'] npor ['bub'] nshi ['bub'] nuk1 ['prs'] nuku ['bub', 'prs', 'ra2', 'rad'] numb2 ['rad'] numbo ['rad'] nuuk ['prs'] ofun ['rad'] ohig ['prs', 'rad'] ohig3 ['prs', 'rad'] olan ['rad'] omae ['rad'] oska ['rad'] oste ['flt'] ouis ['rad'] ouis2 ['rad'] oxel ['rad'] paak ['pwl'] paak2 ['wls'] pada ['prs', 'ra2', 'rad', 'ras'] padn ['prs', 'ra2', 'rad', 'ras'] pagb ['wls'] pago ['pwl'] pagx ['pwl'] pale ['rad'] palm ['rad'] palm1 ['bub', 'prs', 'rad'] pano ['rad'] pant ['rad'] pape ['prs', 'ra2', 'rad'] pape2 ['rad'] pcha ['prs', 'rad'] pdas ['prs', 'ra2', 'rad'] peir ['pr1'] penr ['prs', 'ra2', 'ra3', 'rad'] pich ['prs', 'rad'] pkem ['aqu'] plat2 ['enc', 'ra2', 'rad'] plmy ['atm', 'prs', 'ra2', 'rad'] plom ['rad'] plym ['bub'] pmon ['prs', 'rad'] pmon2 ['prs', 'rad'] pmur ['rad'] pnfl ['wls'] pnfl2 ['pwl'] porl ['aqu'] porp ['bub'] port ['rad'] port2 ['rad'] prev ['rad'] pric ['bub', 'prs', 'rad'] prig ['prs', 'ra2', 'rad', 'ras'] prin2 ['enc'] prud ['wls'] prud2 ['pwl'] prus ['bub'] psla ['wls'] psla2 ['pwl'] pslu ['pwl'] ptal ['prs', 'rad'] ptbl ['prs'] ptln ['rad'] ptln2 ['rad'] ptme ['pwl'] ptmt ['bub'] ptpl ['prs', 'ra2', 'rad'] ptve ['rad'] ptve2 ['rad'] pwil ['prs', 'rad'] qaqo ['prs'] qtro ['prs', 'rad'] quar ['flt'] quepo ['prs', 'ra2', 'rad'] quin ['enc', 'prs', 'ra2', 'rad', 'ras'] quir ['prs', 'rad'] raro ['aqu'] rata ['rad'] rorv ['flt'] rosc ['rad'] rosc2 ['rad'] rose ['bub', 'prs', 'rad'] ross ['aqu'] rous ['rad'] rous2 ['rad'] saba ['enc', 'prs', 'rad'] sado ['rad'] sagu ['rad'] said ['rad'] saig ['rad'] sain ['rad'] sain2 ['rad'] saip ['prs', 'ra2', 'rad'] sala ['prs', 'rad'] salav ['rad'] salv ['enc', 'rad'] salv3 ['enc', 'rad'] sama ['bub', 'prs', 'rad'] sama2 ['rad'] sams ['rad'] san2 ['rad'] sana ['bub', 'prs', 'rad'] sanf ['prs', 'rad'] sanf3 ['prs', 'rad'] sanj ['wls'] sanj2 ['pwl'] scoa ['rad'] scoa2 ['rad'] scor ['prs'] sdpt ['wls'] sete ['rad'] sete2 ['rad'] sewa ['wls'] sewa2 ['pwl'] shee ['bub'] sile ['rad'] simp ['rad'] simr ['rad'] sino ['rad'] sitk ['wls'] sitt ['prs', 'rad', 'ra2'] sjuan ['rad'] skagk ['rad'] smog ['rad'] smth ['rad'] sobr ['rad'] sole ['rad'] sole2 ['rad'] solo ['aqu'] spik ['rad'] sprg ['aqu'] stari ['rad'] stcr ['pwl'] stcr2 ['pwl'] sthl2 ['ra2', 'rad', 'atm'] sthm ['rad'] stjo ['enc'] stma ['rad'] stma2 ['rad'] stor ['bub'] stpa ['rad'] stqy ['rad'] stqy2 ['rad'] subi ['prs', 'ra2', 'rad'] syow ['prs'] syro ['pr1'] tala ['prs', 'rad'] talc ['prs', 'rad'] tara ['aqu'] tara1 ['atm', 'rad'] tarr ['rad'] tasu ['rad'] ters ['flt'] thes ['rad'] thev ['prs'] tkao ['rad'] tkdi ['prs', 'rad'] tkee ['rad'] tosa ['rad'] toul ['rad'] toul2 ['rad'] toya ['rad'] trab ['rad'] treg ['flt'] trst ['aqu'] tst1 ['pwl'] tst2 ['pwl'] tubua ['prs', 'rad'] tudy ['rad'] ushu ['ra2', 'rad'] usti ['prs'] vald2 ['pwl'] vale ['rad'] valp ['prs', 'rad'] valp2 ['prs', 'rad'] vanu ['aqu'] vard ['rad'] vern ['atm', 'pr1', 'pr2', 'rad'] vhbc ['pwl'] vieq ['pwl'] vieq2 ['pwl'] vil2 ['rad'] visb ['rad'] vish ['prs'] viti ['aqu'] vung ['enc', 'prs', 'ra2', 'ra3', 'rad'] waka ['rad'] wake ['wls'] wake2 ['pwl'] warn ['flt'] whit ['bub'] wick ['bub'] wlgt ['prs'] wlms ['wls'] wlms2 ['pwl'] wood ['wls'] work ['bub'] wpwa ['pwl'] xmas ['prs', 'ra2', 'ra3', 'rad'] yabu ['wls'] yaku ['pwl'] zanz ['enc', 'prs', 'rad'] zkth ['rad'] zygi1 ['rad']
'Stations with data: 601'
ioc_cleanup_2024.n_sensors.hvplot.hist(bins=[-0.5,0.5,1.5,2.5,3.5,4.5,5.5])
ioc_cleanup_2024_with_data = ioc_cleanup_2024[ioc_cleanup_2024.ioc_code.isin(keep_stations)]
ioc_cleanup_2024_with_data.to_csv("ioc_cleanup_2024.csv")
store in separate files
for i_s, s in ioc_cleanup_2024_with_data.iterrows():
df = pd.read_parquet(f"data/{s.ioc_code}.parquet")
df = df.drop(columns=[col for col in ["sw1", "sw2", "bat"] if col in df.columns])
for sensor in df.columns:
ts = df[[sensor]]
ts.to_parquet(f"raw/{s.ioc_code}_{sensor}.parquet")
evaluate data availabilty
pattern = r"raw/([\w\d]+)\.parquet"
stations_sensors = [re.search(pattern, path).group(1) for path in glob.glob("raw/*parquet")]
f"Total individual recordings: {len(stations_sensors)}"
'Total individual recordings: 826'
import typing as T
DETIDE_START = pd.Timestamp(2022,1,1)
DETIDE_END = pd.Timestamp(2025,1,1)
def calc_ratio(sr: pd.Series, period: pd.DatetimeIndex) -> float:
sr = sr[(period[0] <= sr.index) & (sr.index <= period[-1])]
return len(sr) / len(period)
table = dict()
for station_sensor in sorted(stations_sensors):
station, sensor = station_sensor.split('_')
df = pd.read_parquet(f"raw/{station_sensor}.parquet")
interval_value_counts = df.index.to_series().diff().value_counts()
main_interval_occurences = interval_value_counts.iloc[0]
main_interval = T.cast(pd.Timedelta, interval_value_counts.index[0])
detide_period = pd.date_range(DETIDE_START, DETIDE_END, freq=main_interval, inclusive="left")
table[station_sensor] = dict()
item = ioc_cleanup_2024_with_data[ioc_cleanup_2024_with_data.ioc_code == station]
table[station_sensor]["lon"] = item.lon.values[0]
table[station_sensor]["lat"] = item.lat.values[0]
table[station_sensor]["completeness"] = calc_ratio(df, detide_period)
# redo per sensor
stations_sensors_availability = pd.DataFrame(table).T
stations_sensors_availability.describe()
stations_sensors_availability.completeness.hvplot.hist()
lon | lat | completeness | |
---|---|---|---|
count | 826.000000 | 826.000000 | 826.000000 |
mean | -5.504539 | 19.513416 | 0.795058 |
std | 89.155516 | 30.031451 | 0.275727 |
min | -177.708000 | -69.007778 | 0.000003 |
25% | -71.627873 | -0.950000 | 0.718041 |
50% | -0.280155 | 24.306100 | 0.931198 |
75% | 39.650000 | 42.635556 | 0.981895 |
max | 179.194900 | 70.980000 | 1.448770 |
stations_sensors_availability.hvplot.points(
x= "lon", y='lat',
hover_cols = ['index',"completeness" ],
color = "completeness",
geo=True,
s = 200
).opts(
height = 800,
width = 1600,
cmap='colorwheel'
) * k2