This notebook contains the methodology for:

  1. choosing the best IOC candidates for storm surge validation purposes

an edit of this notebook will be done for:

  1. the data availability
  2. the data quality of IOC candidates
In [1]:
import geopandas as gp
import pandas as pd
import searvey
from datetime import datetime
import numpy as np
import sklearn.neighbors
import xarray as xr
import hvplot.pandas
import os
/home/tomsail/miniconda3/envs/searvey/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm
In [2]:
world_oceans = gp.read_file("https://gist.github.com/tomsail/2fa52d9667312b586e7d3baee123b57b/raw/23929561eaa8aa76376580a7df300c4e3eb2e509/world_maritime_sectors.json")
In [3]:
IOC_CLEANUP = "ioc_cleanup_2023.csv"
ioc_cleanup = pd.read_csv(IOC_CLEANUP, index_col=0).rename(columns={"longitude": 'lon', "latitude": 'lat', "Station_Name":"location","Country":"country"})
(   world_oceans.hvplot(c='ocean',geo=True).opts(cmap='tab20c') * 
    ioc_cleanup.hvplot.points(x="lon",y="lat",c='k', s= 40,geo=True,coastline=True)
 ).opts(height=450)
WARNING:param.GeoOverlayPlot00566: Due to internal constraints, when aspect and width/height is set, the bokeh backend uses those values as frame_width/frame_height instead. This ensures the aspect is respected, but means that the plot might be slightly larger than anticipated. Set the frame_width/frame_height explicitly to suppress this warning.
Out[3]:

Our experience with skill-panel-demo got us the to following conclusion:

The previous set of stations from ioc_cleanup is not sufficient. We now need to have:

  • more stations to compare with models
  • more time coverage

We want to have a maximum of stations that correspond with STOFS2D output locations. as we want to compare: IOC observations vs our model vs STOFS2D output locations

In [4]:
# The latest STOFS2D locations output locations (for STOFS2D version 2.1.0) are: 
def get_stofs():
    mycols = [str(i) for i in range(6)] # we expect 17 cols max in that file
    stof2d = pd.read_csv(
        "https://polar.ncep.noaa.gov/stofs/data/stofs_2d_glo_elev_stat_v2_1_0",
        names=mycols, 
        sep="\t+|!", 
        header=None, 
        skiprows=1
    )
    stof2d['Info'] = stof2d.apply(lambda row: ' '.join(filter(None, row[2:])), axis=1)
    stof2d['ID'] = stof2d['Info'].apply(lambda x: ' '.join(x.split()[:3]))
    stof2d['Info'] = stof2d.apply(lambda row: row['Info'].replace(row['ID'], '').strip(), axis=1)
    stof2d = stof2d.drop(columns=["2", "3", "4", "5"])
    stof2d.rename(columns={"0": 'lon', "1": 'lat'}, inplace=True)
    return stof2d

stofs = get_stofs()
stofs.hvplot.points(geo=True,coastline=True).opts(height=450)
/tmp/ipykernel_31579/2200778739.py:4: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support regex separators (separators > 1 char and different from '\s+' are interpreted as regex); you can avoid this warning by specifying engine='python'.
  stof2d = pd.read_csv(
WARNING:param.GeoOverlayPlot00717: Due to internal constraints, when aspect and width/height is set, the bokeh backend uses those values as frame_width/frame_height instead. This ensures the aspect is respected, but means that the plot might be slightly larger than anticipated. Set the frame_width/frame_height explicitly to suppress this warning.
Out[4]:

A caveat is that the 1D output files evolve over time:

In [5]:
stofs1 = xr.open_dataset("stofs2d/20220912_stofs_2d_glo.t12z.points.swl.nc")
stofs2 = xr.open_dataset("stofs2d/20231010_stofs_2d_glo.t00z.points.swl.nc")
stofs3 = xr.open_dataset("stofs2d/20241229_stofs_2d_glo.t00z.points.swl.nc")
#SEE APPENDIX FOR DOWNLOADING STOFS2D DATA

stofs_2022 = stofs[stofs.ID.isin([' '.join(s.decode("utf-8").strip().split()[:3]) for s in stofs1.station_name.values])];len(stofs_2022)
stofs_2023 = stofs[stofs.ID.isin([' '.join(s.decode("utf-8").strip().split()[:3]) for s in stofs2.station_name.values])];len(stofs_2023)
stofs_2024 = stofs[stofs.ID.isin([' '.join(s.decode("utf-8").strip().split()[:3]) for s in stofs3.station_name.values])];len(stofs_2024)
Out[5]:
562
Out[5]:
1687
Out[5]:
1688

luckily the new stations were appended at the end of the file. So this will be easier to concatenate data between all the files

In [6]:
stofs_2022[:557].equals(stofs_2023[:557])
stofs_2022[:557].equals(stofs_2024[:557])
Out[6]:
True
Out[6]:
True

We need to compare model storm surge with observation. We use IOC tide stations

In [7]:
def get_meta() -> gp.GeoDataFrame:
    meta_web = searvey.get_ioc_stations().drop(columns=["lon", "lat"])
    meta_api = (
        pd.read_json(
            "http://www.ioc-sealevelmonitoring.org/service.php?query=stationlist&showall=all"
        )
        .drop_duplicates()
        .drop(columns=["lon", "lat"])
        .rename(columns={"Code": "ioc_code", "Lon": "lon", "Lat": "lat"})
    )
    merged = pd.merge(
        meta_web,
        meta_api[["ioc_code", "lon", "lat"]].drop_duplicates(),
        on=["ioc_code"],
    )
    return merged.drop(columns=["geometry"])
ioc_ = get_meta()

We already have established a database for clean IOC data between 2022 and 2023 (see 1st plot), we'll use it as a reference:

In [8]:
stofs_plot = stofs_2022.hvplot.scatter(x= "lon", y="lat", hover_cols = "ID", s=130, c='lightgrey', label = 'STOFS 2022 output stations')
stofs_plot1 = stofs_2023.hvplot.scatter(x="lon", y="lat", hover_cols = "ID", s=150, c='grey', label = 'STOFS 2023 output stations')
stofs_plot2 = stofs_2024.hvplot.scatter(x="lon", y="lat", hover_cols = "ID", s=200, c='k', label = 'STOFS 2024 output stations')
ioc_plot = ioc_.hvplot.scatter(x="lon", y="lat",hover_cols = "ioc_code", s= 30 , c = 'y', label = 'all IOC stations')
ioc_cleanup_plot = ioc_cleanup.hvplot.scatter(coastline=True,x="lon", y="lat",s = 80, c='r', label = "stations cleaned for 2022-2023")

(stofs_plot2 * stofs_plot1 * stofs_plot * ioc_cleanup_plot* ioc_plot).opts(width = 1300, height = 600)
WARNING:param.main: geo option cannot be used with kind='scatter' plot type. Geographic plots are only supported for following plot types: ['bivariate', 'contour', 'contourf', 'dataset', 'hexbin', 'image', 'labels', 'paths', 'points', 'points', 'polygons', 'quadmesh', 'rgb', 'vectorfield']
Out[8]:

We graphically detected all stations not already used in ioc_cleanup and corresponding with STOFS2D output locations

In [48]:
station_to_add = [
    "juan", "sanf", "anto", "ptmo", "valp", "ferg", "ambon", "bitu", "saum", "sho2", "ushu", 
    "espr", "gamb", "riki", "prud", "vald", "cord", "paak", "dsea", "ketc", "june", "skag", "sewa", "anch", "niki", "seld", "kodi", "alak", 
    "dshu", "dkod", "nome", "adak", "niko", "dchu", "midx", "fren", "sthl", "ascen", "jask", "chab", "kara", "musc", 
    "masi", "mais", "kerg", "syow", "ver1", "vern", "wait", "stpa", "sala", "tara", "marsh", "kwaj", "wake", "fong", 
    "solo", "vanu", "numbo", "numb2", "levu", "wlgt", "jack", "hako", "abas", "ofun", "mera", "toya", "nawi", "brpt", "heeia", 
    "moku", "mane", "john", "plmy", "xmas", "penr", "hiva", "pape", "raro", "pago", "pagx", "east", "garc", "Male2", "ganm", "male", "hani", 
    "mini", "coch", "vish", "chtt", "sitt", "moul", "ptbl", "komi", "kota", "lank", "ms001", "sab2", "saba", "vung", "quin", 
    "quar", "curri", "subi", "mani", "luba", "lega", "tkao", "tkee", "chij", "mins", "saip", "mala", "chuu", "kapi", "deke", "naur", "nauu", 
    "dumo", "espe", "porl", "hill", "waik", "lemba", "beno", "prgi", "prig", "cili", "cila", "tjls", "chrs", "ffcj", "cocb", "telu", "sibo", 
    "sib2", "tanjo", "bupo", "padn", "pada", "fpga", "winc", "wbnc", "oinc", "kpva", "leva", "simd", "wsdc", "cbmd", "ocmd", "cmnj", "phap", 
    "mhpa", "btny", "shnj", "mony", "ptme", "cwme", "epme", "hali", "nain", "nuk1", "nuuk", "qaqo", "reyk", "scor", "rptx", "cctx", "pitx", 
    "pric", "ftfr", "rose", "barb", "stcr", "lame", "isab", "vieq", "yobu", "yabu", "faja", "sanj", "arac", "maya", "magi", "penu", "mona", 
    "ptpr", "ptpl", "sama", "bull", "elpo", "limon", "quepo", "sana", "acaj", "acap", "acya", "manz", "mnza", "cabo", "fort", "call", "lobos", 
    "tala", "lali", "vkfl", "nafl", "fmfl", "spfl", "pnfl", "pbfl", "apfl", "tpfl", "fbfl", "moal", "wlms", "psla", "gila", "pfla", "ncla", 
    "apla", "eila", "cpla", "sptx", "gptx", "fptx", "bres", "sthm", "casc", "gibr", "ceut", "mars", "TR22", "gvd9", "alex", "palm", "pdas", 
    "plus", "dakar", "tako", "tkdi", "lagos", "pntn", "sitin", "walvi", "prte", "durb", "pemba", "mtwa", "momb", "lamu", "pmon", "aric", "mata", 
    "plat", "salv", "blueb", 
    # extra for Europe
    "delf", "bork", "harl", "ters", "denh", "hoek", "kiel", "warn", "euro", "mpcw", "dunk", "boul", "diep", "leha", "ouis", "rosc", "stma", "jers", "leco", "audi", "tudy", 
    "lecy", "conc", "sain", "leso", "larp", "iaix", "port", "arca", "scoa", "bil3", "san2", "gij2", "vil2", "setu", "arri", "sagr", "albu", "huel",
    "bon2", "cadi", "mal3", "motr", "alme", "carb", "murc2", "carg", "alac", "gand", "vale", "sagu", "tarr", "barc", "tst", "ptve", "ptln", "sete", "fosm",
    "toul", "figu", "monc", "cent", "rous", "ajac", "sole", "GE25", "LA38", "LI11", "MC41", "PT17", "CF06", "CA02", "MRTM", "PA07", "usti", "matel",
    "ME13", "RC09", "ST44", "GI20", "PDCR", "PLBR", "SC43", "PE09", "PE21", "PRTP", "CT03", "pant", "PSCA", "ppcp", "CI20", "AZ42", "PO40", "SA16", "GA37", 
    "NA23", "PL14", "CETR", "RCCL", "CR08", "lcst", "TA18", "tara1", "OT15", "MNPL", "BA05", "BRLT", "VI12", "IT45", "OR24", "SB36", "AN15", "RA10", "VE19", 
    "baka", "stari", "vela", "sobr", "corf", "prev", "zkth", "kata", "kypa", "kala", "koro", "kaps", "kast", "pale", "hrak", "iera", "kaso", "aigi", "pano", 
    "peir", "noat", "syro", "myko", "delo", "thes", "smth", "gokc", "bozc", "plom", "ment", "bodr", "kos", "plim", "kalt", "mrms", "feth", "bozy", "tasu", 
    "erdem", "arsu", "iske", "girn", "papho", "leme", "zygi", "larn", "para", "gazi", "batr", "haif", "hade", "ashd", "askl", "psail", "matr", "mang", "csta", 
    "sino", "kaci", "sams", "trab", "elja", "said"
]

some station can be declined in different names

In [49]:
possible_stations = []
all_ioc = ioc_.ioc_code.values
for stat in station_to_add:
    if any(stat in station for station in all_ioc):
        for station in all_ioc:
            if stat in station:
                possible_stations.append(station)
ioc_to_add = ioc_[ioc_.ioc_code.isin(possible_stations)]
ioc_to_add
Out[49]:
ioc_code gloss_id country location connection contacts added_to_system observations_arrived_per_week observations_expected_per_week observations_ratio_per_week ... sample_interval average_delay_per_day transmit_interval dcp_id last_observation_level last_observation_time delay interval lon lat
0 abas 327 Japan Abashiri SWJP40 Japan Meteorological Agency ( Japan ) 2012-03-21 09:54:59 10040 10080.0 100 ... 1' 8' 10' ABASHIRI 1.66 07:59 28' 10' 144.290000 44.020000
3 acaj 182 El Salvador Acajutla SV SZXX01 Ministerio de Medio Ambiente y Recursos Natura... 2008-06-20 12:17:00 9750 10080.0 97 ... 1' 7' 5' 300434064008810 0.56 08:09 18' 5' -89.838128 13.573792
4 acap 267 Mexico Acapulco MX SEPA40 Centro de Investigación Científica y de Educac... 2008-04-28 12:36:00 -down- 10080.0 0 ... 1' NaN 5' 3540E15A 8.26 -down- 2799d 5' -99.916600 16.833300
5 acap2 267 Mexico Acapulco API SOMX10 Universidad Nacional Autónoma de México ( Mexi... 2014-05-19 10:50:47 10020 10080.0 99 ... 1' 9' 10' 0100D7CA 4.37 08:06 21' 10' -99.903000 16.837933
9 acya 267 Mexico Acapulco Club de Yates ftp Universidad Nacional Autónoma de México ( Mexi... 2010-08-10 09:24:41 NaN 10080.0 0 ... 1' NaN 10' NaN 1.31 2025-03-05 14:59 17h 10' -99.902980 16.837990
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1631 yabu <NA> USA Yabucoa Harbor, PR SXXX03 Puerto Rico Seismic Network ( USA ) +National ... 2010-03-26 14:06:00 9828 10080.0 98 ... 1' 7' 6' 3366B5CA -6.27 08:08 19' 6' -65.833000 18.055100
1636 yobu <NA> Puerto Rico Yobucou PR SXXX03 NaN 2006-06-07 04:30:00 NaN NaN 0 ... ' NaN 6' 3366B5CA NaN NaN NaN 6' -65.833000 18.050100
1643 zkth <NA> Greece Zakynthos, Ionian Islands bgan National Observatory of Athens ( Greece ) 2023-09-29 06:31:39 10032 10080.0 100 ... 1' 1' 1' GR-ZKTH-00 0.01 08:25 2' 1' 20.905200 37.781420
1646 zygi <NA> Cyprus Zygi ftp Cyprus Oceanography Center ( Cyprus ) 2011-09-07 14:55:04 -down- 20160.0 0 ... 0.5' NaN 1' NaN 1.91 -down- 3548d 1' 33.338375 34.727083
1647 zygi1 <NA> Cyprus Zygi bgan Cyprus Marine and Maritime Institute ( Cyprus ... 2018-03-16 05:24:01 NaN 10080.0 0 ... 1' NaN 1' ZYGI1 NaN 2022-03-18 00:47 1084d 1' 33.340228 34.726315

550 rows × 24 columns

In [45]:
stofs_plot =  stofs_2022.hvplot.scatter(x= "lon", y="lat", hover_cols = "ID", s=130, c='lightgrey', label = 'STOFS 2022 output stations')
stofs_plot1 = stofs_2023.hvplot.scatter(x="lon", y="lat", hover_cols = "ID", s=150, c='grey', label = 'STOFS 2023 output stations')
stofs_plot2 = stofs_2024.hvplot.scatter(x="lon", y="lat", hover_cols = "ID", s=200, c='k', label = 'STOFS 2024 output stations')
ioc_cleanup_plot = ioc_cleanup.hvplot.scatter(x="lon", y="lat",hover_cols = "ioc_code",s = 90, c='r',label = 'stations already cleaned for 2022-2023')
ioc_to_add_plot = ioc_to_add.hvplot.scatter(coastline=True,x="lon", y="lat",hover_cols = "ioc_code", s = 90, c = 'g', label = 'stations to be added')

(stofs_plot2 * stofs_plot1 * stofs_plot * ioc_to_add_plot * ioc_cleanup_plot).opts(width = 1400, height = 600)
WARNING:param.main: geo option cannot be used with kind='scatter' plot type. Geographic plots are only supported for following plot types: ['bivariate', 'contour', 'contourf', 'dataset', 'hexbin', 'image', 'labels', 'paths', 'points', 'points', 'polygons', 'quadmesh', 'rgb', 'vectorfield']
Out[45]:

the 2024 IOC cleanup database is the red + green points

In [61]:
ioc_cleanup_2024 = pd.concat([ioc_cleanup,ioc_to_add])
ioc_cleanup_2024
Out[61]:
location ioc_code gloss_id lat lon country connection contacts dcp_id last_observation_level ... number_of_years time_zone_hours datum_information instrument precision null_value gauge_type overall_record_quality gesla3_id seaset_id
125 Base O'Higgins ohig <NA> -63.321000 -57.901000 Antarctica SXCH40 Servicio Hidrográfico y Oceanográfico de la Ar... ADC04BE6 1.75 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN 125.0
135 Puerto Deseado dese 190.0 -47.754000 -65.915000 Argentina SEPO40 Armada Argentina Servicio de Hidrografía Naval... 33912088 3.69 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN 135.0
136 Puerto Madryn madry 191.0 -42.763000 -65.031000 Argentina SEPO40 Armada Argentina Servicio de Hidrografía Naval... 335665D2 6.60 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN 136.0
139 Battery Point bapj <NA> -42.892000 147.338000 Australia SZAU01 National Tidal Centre/Australian Bureau of Met... 61221 0.91 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN 139.0
140 Broome brom 40.0 -18.001000 122.219000 Australia SZAU01 National Tidal Centre/Australian Bureau of Met... 62650 7.69 ... 32.0 0.0 Unspecified Unspecified Unspecified -99.9999 Coastal No obvious issues Broome 140.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1631 Yabucoa Harbor, PR yabu <NA> 18.055100 -65.833000 USA SXXX03 Puerto Rico Seismic Network ( USA ) +National ... 3366B5CA -6.27 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1636 Yobucou PR yobu <NA> 18.050100 -65.833000 Puerto Rico SXXX03 NaN 3366B5CA NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1643 Zakynthos, Ionian Islands zkth <NA> 37.781420 20.905200 Greece bgan National Observatory of Athens ( Greece ) GR-ZKTH-00 0.01 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1646 Zygi zygi <NA> 34.727083 33.338375 Cyprus ftp Cyprus Oceanography Center ( Cyprus ) NaN 1.91 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1647 Zygi zygi1 <NA> 34.726315 33.340228 Cyprus bgan Cyprus Marine and Maritime Institute ( Cyprus ... ZYGI1 NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

719 rows × 77 columns

In [18]:
def find_nearest_nodes(
    mesh_nodes: pd.DataFrame,
    points: pd.DataFrame,
    metric: str = "haversine",
    earth_radius = 6371000,
    ):
    """
    Calculate the mesh nodes that are nearest to the specified `points`.
    Both `mesh_nodes` and `points` must be `pandas.DataFrames` that have
    columns named `lon` and `lat` and the coords must be in EPSG:4326.
    Returns the `points` DataFrame after adding these extra columns:
    - `mesh_index` which is the index of the node in the `hgrid.gr3` file
    - `mesh_lon` which is the longitude of the nearest mesh node
    - `mesh_lat` which is the latitude of the nearest mesh node
    - `distance` which is the distance in meters between the point and the nearest mesh node
    Examples:
        >>> mesh_nodes = pd.DataFrame({
        ...     "lon": [0, 10, 20],
        ...     "lat": [0, 5, 0],
        ... })
        >>> points = pd.DataFrame({
        ...     "lon": [1, 11, 21],
        ...     "lat": [1, 4, 1],
        ...     "id": ["a", "b", "c"],
        ... })
        >>> nearest_nodes = find_nearest_nodes(mesh_nodes, points)
        >>> nearest_nodes
           lon  lat id  mesh_index  mesh_lon  mesh_lat       distance
        0    1    1  a           0         0         0  157249.381272
        1   11    4  b           1        10         5  157010.162641
        2   21    1  c           2        20         0  157249.381272
    """
    # The only requirement is that both `mesh_nodes and `points` have `lon/lat` columns
    tree = sklearn.neighbors.BallTree(
        np.radians(mesh_nodes[["lat", "lon"]]),
        metric=metric,
    )
    distances, indices = tree.query(np.radians(points[["lat", "lon"]].values))
    closest_nodes = (
        mesh_nodes
        .rename(columns={"lon": "mesh_lon", "lat": "mesh_lat"})
        .iloc[indices.flatten()]
        .assign(distance=(distances.flatten() * earth_radius))
        .reset_index(names=["mesh_index"])
    )

    return pd.concat((points.reset_index(drop = True), closest_nodes), axis="columns")

# 2 - get STOFS
nearest_nodes_2022 = find_nearest_nodes(stofs_2022, ioc_cleanup_2024[["lon","lat","ioc_code","location"]])
nearest_nodes_2023 = find_nearest_nodes(stofs_2023, ioc_cleanup_2024[["lon","lat","ioc_code","location"]])
nearest_nodes_2024 = find_nearest_nodes(stofs_2024, ioc_cleanup_2024[["lon","lat","ioc_code","location"]])
nearest_nodes_2022 = nearest_nodes_2022[~nearest_nodes_2022.mesh_index.isna()]
nearest_nodes_2023 = nearest_nodes_2023[~nearest_nodes_2023.mesh_index.isna()]
nearest_nodes_2024 = nearest_nodes_2024[~nearest_nodes_2024.mesh_index.isna()]
keep_nodes_2022 = nearest_nodes_2022[nearest_nodes_2022.distance < 5000]
keep_nodes_2023 = nearest_nodes_2023[nearest_nodes_2023.distance < 5000]
keep_nodes_2024 = nearest_nodes_2024[nearest_nodes_2024.distance < 5000]

keep_nodes_2022.to_csv("keep_nodes_2022.csv")
keep_nodes_2023.to_csv("keep_nodes_2023.csv")
keep_nodes_2024.to_csv("keep_nodes_2024.csv")

red are all the STOFS2D points to be extracted

In [19]:
p2 = stofs_2022.hvplot.scatter(x="lon", y="lat", hover_cols = "ID", s=70, c='grey',line_color="lightgrey", label = 'STOFS 2022 output stations')
ip = ioc_cleanup_2024.hvplot.scatter(x="lon", y="lat",s = 10, c='k',coastline=True, label = 'IOC_CLEANUP 2022-2024')
k2 = keep_nodes_2022.hvplot.scatter(x="lon", y="lat", c = 'red', s = 20,coastline=True, label = "STOFS2D stations to be extracted")

# (world_oceans.hvplot(c='ocean',alpha= 0.9).opts(cmap='tab20c') * p2 * ip * k2 ).opts(width = 1100, height = 800)
(world_oceans.hvplot(c='ocean',alpha= 0.9).opts(cmap='tab20c') * p2 * ip ).opts(width = 1500, height = 900)
WARNING:param.main: geo option cannot be used with kind='scatter' plot type. Geographic plots are only supported for following plot types: ['bivariate', 'contour', 'contourf', 'dataset', 'hexbin', 'image', 'labels', 'paths', 'points', 'points', 'polygons', 'quadmesh', 'rgb', 'vectorfield']
WARNING:param.main: geo option cannot be used with kind='scatter' plot type. Geographic plots are only supported for following plot types: ['bivariate', 'contour', 'contourf', 'dataset', 'hexbin', 'image', 'labels', 'paths', 'points', 'points', 'polygons', 'quadmesh', 'rgb', 'vectorfield']
Out[19]:

download IOC data

In [51]:
for i_s, station in ioc_cleanup_2024.iterrows():
    if os.path.exists(f"data/{station.ioc_code}.parquet"):
        print(station.ioc_code, "done")
    else: 
        print(station.ioc_code, "downloading..")
        df = searvey.fetch_ioc_station(station.ioc_code, "2022-01-01", "2024-12-31")
        df.to_parquet(f"data/{station.ioc_code}.parquet")
ohig done
dese done
madry done
bapj done
brom done
barn done
djve done
darw done
pkem done
pmur done
ross done
sprg done
thev done
trst done
oste done
bele done
bamf done
prin done
stjo done
vhbc done
greg done
ohig3 done
cald done
coqu done
corr done
pich done
ptal done
pcha done
pwil done
qtro done
quir done
talc done
viti done
cher done
dzao done
herb done
mare done
nuku done
stqy done
stqy2 done
tubua done
cuxh done
helg done
horn done
itea done
LA23 done
abur done
fuka done
hmda done
hana done
ishig done
kusm done
kush done
naga done
naha done
omae done
sado done
saig done
tosa done
waka done
kant done
huat done
chst done
ande done
honn done
malo done
rorv done
treg done
vard done
davo done
dapi done
hie2 done
fue2 done
coru done
arko done
fors done
furu done
gokr done
holm done
kalit done
karl done
klag done
kung done
kungr done
land done
olan done
oska done
oxel done
rata done
simp done
simr done
smog done
spik done
visb done
zanz done
nkfa done
amas done
anta done
igne done
sile done
abed done
bang done
crom done
dove done
harw done
heys done
holy done
holy2 done
ilfa done
kinl done
leit done
lerw done
lerw2 done
live done
lowe done
mhav done
mill done
mumb done
nhav done
newl done
newl2 done
npor done
nshi done
plym done
porp done
prus done
ptmt done
shee done
stor done
whit done
wick done
work done
alam done
aren done
asto done
atka done
acnj done
bamd done
bame done
benc done
boma done
bgct done
amal done
chrp done
cwfl done
cres done
dkwa done
datu done
dpnc done
dutc done
elak done
fpnt done
guam done
hilo done
kahu done
kawa done
kwfl done
lajo done
lime done
pagb done
pslu done
sdpt done
sitk done
wpwa done
wood done
yaku done
colo done
dnze done
prin2 done
abas done
acaj done
acap done
acap2 done
acya done
adak done
adak2 done
aigi done
ajac done
ajac2 done
alac1 done
alac2 done
alak done
alak2 done
albu done
alex done
alex1 done
alex2 done
alex3 done
alme done
alme2 done
ambon done
AN15 done
anch done
anch2 done
anto done
anto2 done
apfl done
apfl2 done
apla done
apla2 done
arac done
aracS done
arca done
arca2 done
aric done
aric2 done
arri done
arsu done
ascen done
ashd done
ashd1 done
askl done
audi done
audi2 done
AZ42 done
BA05 done
baka done
barb done
barb2 done
barc done
barc2 done
batr done
beno done
bil3 done
bitu done
blueb done
bodr done
bodri done
bon2 done
bork done
boul done
boul2 done
bozc done
bozy done
bres done
bres2 done
BRLT done
brpt done
btny done
btny2 done
bull done
bupo done
CA02 done
cabo done
cadi done
call done
carb done
carg done
casc done
cbmd done
cbmd2 done
cctx done
cctx2 done
cent done
cent2 done
CETR done
ceut done
ceut1 done
CF06 done
chab done
chij done
chrs done
chtt done
chuu done
CI20 done
cila done
cili done
cmnj done
cmnj2 done
cocb done
coch done
conc done
conc2 done
cord done
cord2 done
corf done
cpla done
cpla2 done
CR08 done
csta done
csta2 done
csta3 done
CT03 done
curri done
cwme done
cwme2 done
dakar done
dchu done
dchu2 done
deke done
delf done
delo done
denh done
diep done
diep2 done
dkod done
dkod2 done
dsea done
dsea2 done
dshu done
dshu2 done
dumo done
dunk done
dunk2 done
durb done
east done
east2 done
eila done
elja1 done
elpo done
epme done
epme2 done
erdem done
espe done
espr done
euro done
faja done
fbfl done
fbfl2 done
ferg done
feth done
ffcj done
figu done
figu2 done
fmfl done
fmfl2 done
fong done
fort done
fort2 done
fosm done
fosm2 done
fpga done
fpga2 done
fptx done
fren done
ftfr done
ftfr2 done
GA37 done
gamb done
gand done
ganm done
garc done
gazi done
GE25 done
GI20 done
gibr done
gibr2 done
gibr3 done
gij2 done
gila done
gila2 done
girn done
gokc done
gptx done
gptx2 done
gvd9 done
hade done
hade2 done
haif done
hako done
hali done
hani done
harl done
heeia done
hill done
hiva done
hoek done
hrak done
htst done
huel done
iaix done
iera done
isab done
iske done
IT45 done
jack done
jask done
jask2 done
jers done
john done
juan done
juan2 done
june done
kaci done
kala done
kalt done
kapi done
kaps done
kara done
kaso done
kast done
kata done
kerg done
kerg2 done
ketc done
kiel done
kodi done
kodi2 done
komi done
koro done
kos1 done
kos2 done
kota done
kpva done
kwaj done
kwaj2 done
kypa done
LA38 done
lagos done
lali done
lame done
lame2 done
lamu done
lank done
larn done
larn2 done
larp done
larp2 done
lcst done
leco done
leco2 done
lecy done
lega done
leha done
leha2 done
lemba done
leme done
leso done
leso2 done
leva done
levu done
LI11 done
limon done
lobos done
luba done
magi done
magi2 done
mais done
mais2 done
mal3 done
mala done
male done
Male2 done
mane done
mang done
mani done
manz done
mars done
marsh done
masi done
masi2 done
mata done
matel done
matr done
maya done
MC41 done
ME13 done
ment done
mera done
mhpa done
midx done
midx2 done
mini done
mins done
MNPL done
mnza done
moal done
moku done
momb done
mona done
mona2 done
monc done
monc2 done
mony done
mony2 done
motr done
moul done
mpcw done
mrms done
MRTM done
ms001 done
mtwa done
murc2 done
musc done
musc3 done
myko done
NA23 done
nafl done
nain done
naur done
nauu done
nawi done
nawi2 done
ncla done
niki done
niko done
noat done
nome done
nome2 done
nuk1 done
numb2 done
numbo done
nuuk done
ocmd done
ofun done
oinc done
OR24 done
OT15 done
ouis done
ouis2 done
PA07 done
paak done
paak2 done
pada done
padn done
pago done
pago2 done
pagx done
pale done
palm done
palm1 done
pano done
pant done
pape done
pape2 done
papho done
para done
pbfl done
pdas done
PDCR done
PE09 done
PE21 done
peir done
pemba done
penr done
penu done
pfla done
pitx done
PL14 done
plat done
plat2 done
PLBR done
plim done
plmy done
plom done
pmon done
pmon2 done
pnfl done
pnfl2 done
pntn done
PO40 done
porl done
port done
port2 done
ppcp done
prev done
prgi done
pric done
prig done
prte done
PRTP done
prud done
prud2 done
PSCA done
psla done
psla2 done
PT17 done
ptbl done
ptln done
ptln2 done
ptme done
ptpl done
ptpr done
ptve done
ptve2 done
qaqo done
quar done
quepo done
quin done
RA10 done
raro done
raro2 done
RC09 done
RCCL done
reyk done
riki done
rosc done
rosc2 done
rose done
rous done
rous2 done
rptx done
SA16 done
sab2 done
saba done
sagr done
sagu done
said done
sain done
sain2 done
saip done
sala done
salav done
salv done
salv3 done
sama done
sama2 done
sams done
san2 done
sana done
sanf done
sanf3 done
sanj done
sanj2 done
saum done
SB36 done
SC43 done
scoa done
scoa2 done
scor done
seld done
sete done
sete2 done
setu done
sewa done
sewa2 done
shnj done
sho2 done
sib2 done
sibo done
simd done
sino done
sitin done
sitt done
sjuan done
skag done
skagk done
smth downloading..
sobr downloading..
sole downloading..
sole2 downloading..
solo done
spfl done
sptx done
ST44 downloading..
IOC-ST44: No data. Creating a dummy dataframe
stari downloading..
stcr done
stcr2 done
sthl done
sthl2 done
sthm done
stma downloading..
stma2 downloading..
stpa done
subi done
syow done
syro downloading..
TA18 downloading..
tako done
tala done
tanjo done
tara done
tara1 done
tarr downloading..
tasu downloading..
telu done
ters downloading..
thes downloading..
tjls done
tkao done
tkdi done
tkee done
toul downloading..
toul2 downloading..
toya done
tpfl done
TR22 done
trab downloading..
tst1 downloading..
tst2 downloading..
IOC-tst2: Dropped duplicates: 15477 rows
IOC-tst2: Dropped duplicates: 15194 rows
IOC-tst2: Dropped duplicates: 3565 rows
IOC-tst2: Dropped duplicates: 26706 rows
IOC-tst2: Dropped duplicates: 13797 rows
IOC-tst2: Dropped duplicates: 31348 rows
IOC-tst2: Dropped duplicates: 20907 rows
IOC-tst2: Dropped duplicates: 11034 rows
tudy downloading..
ushu done
usti downloading..
vald done
vald2 done
vale downloading..
valp done
valp2 done
valp3 done
vanu done
VE19 downloading..
vela downloading..
IOC-vela: No data. Creating a dummy dataframe
ver1 done
vern done
VI12 downloading..
vieq done
vieq2 done
vil2 downloading..
vish done
vkfl done
vung done
waik done
wait done
wake done
wake2 done
walvi done
warn downloading..
wbnc done
winc done
wlgt done
wlms done
wlms2 done
wsdc done
xmas done
yabu done
yobu done
zkth downloading..
zygi downloading..
IOC-zygi: No data. Creating a dummy dataframe
zygi1 downloading..

check data availability

In [52]:
import re 
import glob

ioc_cleanup_2024['n_sensors'] = 0

pattern = r"data/([\w\d]+)\.parquet"
stations = [re.search(pattern, path).group(1) for path in glob.glob("data/*parquet")]
f"Total stations: {len(stations)}"

# get the stations with data
keep_stations = []
for station in sorted(stations): 
    df = pd.read_parquet(f"data/{station}.parquet")
    if df.empty:
        pass
    else:
        print(station, end="  ")
        keep_stations.append(station)
        df = df.drop(columns=[col for col in ["sw1", "sw2", "bat"] if col in df.columns])
        print(list(df.columns))

        ioc_cleanup_2024.loc[ioc_cleanup_2024.ioc_code == station, "n_sensors"] = len(df.columns)
        # disregard sw1, sw2 and bat
f"Stations with data: {len(keep_stations)}"
Out[52]:
'Total stations: 719'
AN15  ['rad']
AZ42  ['rad']
BA05  ['rad']
BRLT  ['rad']
CA02  ['rad']
CETR  ['pr1', 'pr2']
CF06  ['rad']
CI20  ['rad']
CR08  ['rad']
CT03  ['rad']
GA37  ['rad']
GE25  ['rad']
GI20  ['prs']
IT45  ['rad']
LA23  ['rad']
LA38  ['rad']
LI11  ['rad']
MC41  ['rad']
ME13  ['rad']
MNPL  ['rad']
MRTM  ['pr1', 'pr2']
Male2  ['prs', 'ra2', 'rad']
NA23  ['rad']
OR24  ['rad']
OT15  ['rad']
PA07  ['rad']
PDCR  ['pr1', 'pr2']
PE09  ['prs']
PL14  ['rad']
PLBR  ['pr1']
PO40  ['rad']
PRTP  ['pr1', 'pr2']
PSCA  ['pr1', 'pr2']
PT17  ['rad']
RA10  ['rad']
RC09  ['rad']
RCCL  ['pr1', 'pr2']
SA16  ['rad']
SB36  ['rad']
SC43  ['rad']
TA18  ['rad']
TR22  ['rad']
VE19  ['rad']
VI12  ['rad']
abas  ['rad']
abed  ['bub']
abur  ['rad']
acaj  ['atm', 'prs', 'ra2', 'rad']
acap2  ['rad']
acnj  ['wls']
acya  ['flt']
adak  ['wls']
adak2  ['pwl']
aigi  ['rad']
ajac  ['rad']
ajac2  ['rad']
alac1  ['rad']
alac2  ['rad']
alak  ['pwl']
alak2  ['pwl']
alam  ['pwl']
albu  ['rad']
alex1  ['rad']
alex2  ['rad']
alex3  ['pwl']
alme  ['rad']
alme2  ['rad']
amal  ['pwl']
amas  ['rad']
ambon  ['enc', 'prs', 'rad']
anch  ['pwl']
anch2  ['pwl']
ande  ['flt']
anta  ['rad']
anto  ['prs', 'rad']
anto2  ['prs', 'rad']
apfl  ['pwl']
apfl2  ['pwl']
apla  ['wls']
apla2  ['pwl']
arac  ['prs']
aracS  ['pwl']
arca  ['rad']
arca2  ['rad']
aren  ['pwl']
aric  ['prs', 'rad']
aric2  ['prs', 'rad']
arko  ['rad']
arri  ['rad']
arsu  ['rad']
ascen  ['atm', 'prs', 'ra2', 'rad']
asto  ['pwl']
atka  ['wls']
audi  ['rad']
audi2  ['rad']
baka  ['rad']
bamd  ['pwl']
bame  ['pwl']
bamf  ['enc']
bang  ['bub']
bapj  ['rad']
barb2  ['pwl']
barc  ['rad']
barc2  ['ra2', 'rad', 'atm']
barn  ['aqu']
batr  ['rad']
bele  ['enc', 'rad']
benc  ['pwl']
beno  ['prs', 'rad', 'ras']
bgct  ['pwl']
bil3  ['rad']
bitu  ['prs', 'rad', 'ras']
blueb  ['prs', 'rad']
bodr  ['ecs']
bodri  ['rad']
boma  ['pwl']
bon2  ['rad']
bork  ['flt']
boul  ['rad']
boul2  ['rad']
bozc  ['rad']
bozy  ['rad']
bres  ['rad']
bres2  ['rad']
brom  ['aqu']
brpt  ['prs', 'rad', 'stp', 'ra2']
btny  ['pwl']
btny2  ['pwl']
bull  ['bub', 'prs', 'rad']
bupo  ['rad']
cadi  ['rad']
cald  ['prs', 'rad']
call  ['prs', 'rad', 'enc']
carb  ['rad']
carg  ['rad']
casc  ['rad']
cbmd  ['pwl']
cbmd2  ['pwl']
cent  ['rad']
cent2  ['rad']
ceut  ['rad']
ceut1  ['rad']
cher  ['rad']
chij  ['rad']
chrp  ['pwl']
chrs  ['rad']
chst  ['prs']
chtt  ['enc', 'prs', 'ra2', 'rad']
chuu  ['rad', 'ra2', 'prs']
cili  ['prs', 'ra2', 'ra3', 'rad']
cmnj  ['pwl']
cmnj2  ['pwl']
cocb  ['aqu']
coch  ['prs']
colo  ['prs', 'ra2', 'rad']
conc  ['rad']
conc2  ['rad']
coqu  ['prs', 'rad']
cord  ['pwl']
cord2  ['pwl']
corf  ['rad']
corr  ['prs', 'rad']
coru  ['flt']
cpla  ['wls']
cpla2  ['pwl']
cres  ['pwl']
crom  ['bub']
csta2  ['rad']
csta3  ['prs']
curri  ['enc', 'prs', 'rad']
cuxh  ['flt']
cwfl  ['pwl']
cwme  ['pwl']
cwme2  ['pwl']
dakar  ['prs', 'ra2', 'rad']
dapi  ['prt', 'prte']
darw  ['aqu']
datu  ['prt', 'prte']
davo  ['prs', 'ra2', 'rad', 'ras']
dchu  ['prt', 'prte']
dchu2  ['prt']
deke  ['prs']
delf  ['flt']
denh  ['flt']
dese  ['prs', 'rad', 'enc']
diep  ['rad']
diep2  ['rad']
djve  ['prt', 'prte']
dkod  ['prt', 'prte']
dkod2  ['prt']
dkwa  ['prt', 'prte']
dnze  ['prt', 'prte']
dove  ['bub']
dpnc  ['wls']
dsea  ['prt', 'prte']
dsea2  ['prt', 'prte']
dshu  ['prt']
dumo  ['prs']
dunk  ['rad']
dunk2  ['rad']
dutc  ['wls']
dzao  ['rad']
east  ['prs', 'rad']
east2  ['prs', 'rad']
elak  ['pwl']
elja1  ['pwl']
elpo  ['prs', 'ra2', 'ra3', 'rad']
epme  ['pwl']
epme2  ['pwl']
erdem  ['rad']
espe  ['aqu']
euro  ['flt']
faja  ['pwl']
fbfl  ['pwl']
fbfl2  ['pwl']
ferg  ['aqu']
ffcj  ['rad']
figu  ['rad']
figu2  ['rad']
fmfl  ['pwl']
fmfl2  ['pwl']
fong  ['aqu']
fors  ['rad']
fort  ['enc', 'rad']
fort2  ['enc', 'rad']
fosm  ['rad']
fosm2  ['rad']
fpga  ['wls']
fpga2  ['pwl']
fpnt  ['wls']
fren  ['prs', 'ra2', 'rad']
ftfr  ['rad']
ftfr2  ['rad']
fue2  ['rad']
fuka  ['rad']
furu  ['rad']
gamb  ['prs', 'rad']
gand  ['rad']
ganm  ['prs', 'ra2', 'ra3', 'rad', 'ras']
garc  ['enc', 'prs', 'rad']
gibr2  ['rad']
gibr3  ['atm', 'pr1', 'pr2', 'rad']
gij2  ['rad']
gila  ['wls']
gokc  ['ecs']
gokr  ['rad']
gptx  ['bwl']
greg  ['prs', 'rad']
guam  ['wls']
hade2  ['rad']
hako  ['rad']
hana  ['rad']
hani  ['prs', 'ra2', 'rad']
harl  ['flt']
harw  ['bub']
heeia  ['rad']
helg  ['flt']
herb  ['rad']
heys  ['bub']
hie2  ['rad']
hill  ['aqu']
hilo  ['wls']
hiva  ['pr2', 'prs']
hmda  ['rad']
hoek  ['flt']
holm  ['rad']
holy2  ['bub']
honn  ['flt']
horn  ['flt']
hrak  ['rad']
huat  ['rad']
huel  ['rad']
iaix  ['rad']
iera  ['rad']
igne  ['rad']
ilfa  ['bub']
isab  ['pwl']
ishig  ['rad']
itea  ['rad']
jack  ['rad']
jers  ['bub']
john  ['enc', 'prs', 'rad']
juan  ['prs', 'rad']
juan2  ['prs', 'rad']
kahu  ['pwl']
kala  ['pr1']
kalit  ['rad']
kant  ['enb', 'enc', 'prs', 'rad', 'stp']
kapi  ['enb', 'enc', 'prs', 'rad']
kaps  ['rad']
kara  ['pr1', 'rad']
karl  ['rad']
kaso  ['rad']
kast  ['rad']
kata  ['pr1']
kawa  ['pwl']
kerg  ['prs']
kerg2  ['rad']
kiel  ['flt']
kinl  ['bub']
klag  ['rad']
kodi  ['wls']
kodi2  ['pwl']
koro  ['rad']
kos1  ['rad']
kos2  ['rad']
kung  ['rad']
kungr  ['rad']
kush  ['rad']
kusm  ['rad']
kwaj  ['wls']
kwaj2  ['pwl']
kwfl  ['wls']
lajo  ['bwl']
lali  ['prs', 'rad', 'ras']
lame  ['pwl']
lame2  ['pwl']
lamu  ['enc', 'prs', 'rad']
land  ['rad']
lank  ['prs', 'ra2', 'rad', 'ras']
larn2  ['rad']
larp  ['rad']
larp2  ['rad']
leco  ['rad']
leco2  ['rad']
lecy  ['rad']
lega  ['prs', 'rad', 'ra2']
leha  ['rad']
leha2  ['rad']
leit  ['bub']
lemba  ['prs', 'rad', 'ras']
leme  ['rad']
lerw2  ['bub']
leso  ['rad']
leso2  ['rad']
levu  ['aqu']
lime  ['pwl']
limon  ['prs', 'rad', 'ra2']
live  ['bub']
lobos  ['rad']
lowe  ['bub']
luba  ['enc', 'prs', 'rad']
madry  ['prs', 'ra2', 'rad']
magi  ['pwl']
magi2  ['pwl']
mais  ['rad']
mal3  ['rad']
mala  ['prs', 'ra2', 'rad', 'ra3']
male  ['enc', 'prs', 'rad']
malo  ['flt']
mane  ['rad']
mang  ['rad']
mani  ['prs', 'ra2', 'rad']
mare  ['prs', 'rad']
mars  ['rad']
marsh  ['rad']
masi  ['enc', 'prs', 'rad']
masi2  ['rad']
mata  ['enc', 'prs', 'rad']
matel  ['rad']
matr  ['pwl']
maya  ['pwl']
ment  ['rad']
mera  ['rad']
mhav  ['bub']
midx  ['bwl']
midx2  ['pwl']
mill  ['bub']
mini  ['prs']
mins  ['prs']
mnza  ['bub', 'rad']
moku  ['pwl']
momb  ['prs', 'ra2', 'rad']
mona  ['pwl']
mona2  ['pwl']
monc  ['rad']
monc2  ['rad']
mony  ['wls']
mony2  ['pwl']
motr  ['rad']
mpcw  ['rad']
mrms  ['rad']
ms001  ['prs']
mumb  ['bub']
murc2  ['rad']
musc  ['enc', 'rad', 'prs']
musc3  ['rad']
naga  ['rad']
naha  ['rad']
nain  ['enc']
nauu  ['prs']
nawi  ['pwl']
newl2  ['bub']
nhav  ['bub']
nkfa  ['aqu']
noat  ['rad']
nome  ['bwl']
npor  ['bub']
nshi  ['bub']
nuk1  ['prs']
nuku  ['bub', 'prs', 'ra2', 'rad']
numb2  ['rad']
numbo  ['rad']
nuuk  ['prs']
ofun  ['rad']
ohig  ['prs', 'rad']
ohig3  ['prs', 'rad']
olan  ['rad']
omae  ['rad']
oska  ['rad']
oste  ['flt']
ouis  ['rad']
ouis2  ['rad']
oxel  ['rad']
paak  ['pwl']
paak2  ['wls']
pada  ['prs', 'ra2', 'rad', 'ras']
padn  ['prs', 'ra2', 'rad', 'ras']
pagb  ['wls']
pago  ['pwl']
pagx  ['pwl']
pale  ['rad']
palm  ['rad']
palm1  ['bub', 'prs', 'rad']
pano  ['rad']
pant  ['rad']
pape  ['prs', 'ra2', 'rad']
pape2  ['rad']
pcha  ['prs', 'rad']
pdas  ['prs', 'ra2', 'rad']
peir  ['pr1']
penr  ['prs', 'ra2', 'ra3', 'rad']
pich  ['prs', 'rad']
pkem  ['aqu']
plat2  ['enc', 'ra2', 'rad']
plmy  ['atm', 'prs', 'ra2', 'rad']
plom  ['rad']
plym  ['bub']
pmon  ['prs', 'rad']
pmon2  ['prs', 'rad']
pmur  ['rad']
pnfl  ['wls']
pnfl2  ['pwl']
porl  ['aqu']
porp  ['bub']
port  ['rad']
port2  ['rad']
prev  ['rad']
pric  ['bub', 'prs', 'rad']
prig  ['prs', 'ra2', 'rad', 'ras']
prin2  ['enc']
prud  ['wls']
prud2  ['pwl']
prus  ['bub']
psla  ['wls']
psla2  ['pwl']
pslu  ['pwl']
ptal  ['prs', 'rad']
ptbl  ['prs']
ptln  ['rad']
ptln2  ['rad']
ptme  ['pwl']
ptmt  ['bub']
ptpl  ['prs', 'ra2', 'rad']
ptve  ['rad']
ptve2  ['rad']
pwil  ['prs', 'rad']
qaqo  ['prs']
qtro  ['prs', 'rad']
quar  ['flt']
quepo  ['prs', 'ra2', 'rad']
quin  ['enc', 'prs', 'ra2', 'rad', 'ras']
quir  ['prs', 'rad']
raro  ['aqu']
rata  ['rad']
rorv  ['flt']
rosc  ['rad']
rosc2  ['rad']
rose  ['bub', 'prs', 'rad']
ross  ['aqu']
rous  ['rad']
rous2  ['rad']
saba  ['enc', 'prs', 'rad']
sado  ['rad']
sagu  ['rad']
said  ['rad']
saig  ['rad']
sain  ['rad']
sain2  ['rad']
saip  ['prs', 'ra2', 'rad']
sala  ['prs', 'rad']
salav  ['rad']
salv  ['enc', 'rad']
salv3  ['enc', 'rad']
sama  ['bub', 'prs', 'rad']
sama2  ['rad']
sams  ['rad']
san2  ['rad']
sana  ['bub', 'prs', 'rad']
sanf  ['prs', 'rad']
sanf3  ['prs', 'rad']
sanj  ['wls']
sanj2  ['pwl']
scoa  ['rad']
scoa2  ['rad']
scor  ['prs']
sdpt  ['wls']
sete  ['rad']
sete2  ['rad']
sewa  ['wls']
sewa2  ['pwl']
shee  ['bub']
sile  ['rad']
simp  ['rad']
simr  ['rad']
sino  ['rad']
sitk  ['wls']
sitt  ['prs', 'rad', 'ra2']
sjuan  ['rad']
skagk  ['rad']
smog  ['rad']
smth  ['rad']
sobr  ['rad']
sole  ['rad']
sole2  ['rad']
solo  ['aqu']
spik  ['rad']
sprg  ['aqu']
stari  ['rad']
stcr  ['pwl']
stcr2  ['pwl']
sthl2  ['ra2', 'rad', 'atm']
sthm  ['rad']
stjo  ['enc']
stma  ['rad']
stma2  ['rad']
stor  ['bub']
stpa  ['rad']
stqy  ['rad']
stqy2  ['rad']
subi  ['prs', 'ra2', 'rad']
syow  ['prs']
syro  ['pr1']
tala  ['prs', 'rad']
talc  ['prs', 'rad']
tara  ['aqu']
tara1  ['atm', 'rad']
tarr  ['rad']
tasu  ['rad']
ters  ['flt']
thes  ['rad']
thev  ['prs']
tkao  ['rad']
tkdi  ['prs', 'rad']
tkee  ['rad']
tosa  ['rad']
toul  ['rad']
toul2  ['rad']
toya  ['rad']
trab  ['rad']
treg  ['flt']
trst  ['aqu']
tst1  ['pwl']
tst2  ['pwl']
tubua  ['prs', 'rad']
tudy  ['rad']
ushu  ['ra2', 'rad']
usti  ['prs']
vald2  ['pwl']
vale  ['rad']
valp  ['prs', 'rad']
valp2  ['prs', 'rad']
vanu  ['aqu']
vard  ['rad']
vern  ['atm', 'pr1', 'pr2', 'rad']
vhbc  ['pwl']
vieq  ['pwl']
vieq2  ['pwl']
vil2  ['rad']
visb  ['rad']
vish  ['prs']
viti  ['aqu']
vung  ['enc', 'prs', 'ra2', 'ra3', 'rad']
waka  ['rad']
wake  ['wls']
wake2  ['pwl']
warn  ['flt']
whit  ['bub']
wick  ['bub']
wlgt  ['prs']
wlms  ['wls']
wlms2  ['pwl']
wood  ['wls']
work  ['bub']
wpwa  ['pwl']
xmas  ['prs', 'ra2', 'ra3', 'rad']
yabu  ['wls']
yaku  ['pwl']
zanz  ['enc', 'prs', 'rad']
zkth  ['rad']
zygi1  ['rad']
Out[52]:
'Stations with data: 601'
In [53]:
ioc_cleanup_2024.n_sensors.hvplot.hist(bins=[-0.5,0.5,1.5,2.5,3.5,4.5,5.5])
Out[53]:
In [62]:
ioc_cleanup_2024_with_data = ioc_cleanup_2024[ioc_cleanup_2024.ioc_code.isin(keep_stations)]
ioc_cleanup_2024_with_data.to_csv("ioc_cleanup_2024.csv")

store in separate files

In [56]:
for i_s, s in ioc_cleanup_2024_with_data.iterrows():
    df = pd.read_parquet(f"data/{s.ioc_code}.parquet")
    df = df.drop(columns=[col for col in ["sw1", "sw2", "bat"] if col in df.columns])
    for sensor in df.columns:
        ts = df[[sensor]]
        ts.to_parquet(f"raw/{s.ioc_code}_{sensor}.parquet")

evaluate data availabilty

In [57]:
pattern = r"raw/([\w\d]+)\.parquet"
stations_sensors = [re.search(pattern, path).group(1) for path in glob.glob("raw/*parquet")]
f"Total individual recordings: {len(stations_sensors)}"
Out[57]:
'Total individual recordings: 826'
In [58]:
import typing as T
DETIDE_START = pd.Timestamp(2022,1,1)
DETIDE_END = pd.Timestamp(2025,1,1)

def calc_ratio(sr: pd.Series, period: pd.DatetimeIndex) -> float:
    sr = sr[(period[0] <= sr.index) & (sr.index <= period[-1])]
    return len(sr) / len(period)

table = dict()
for station_sensor in sorted(stations_sensors):
    station, sensor = station_sensor.split('_')
    df = pd.read_parquet(f"raw/{station_sensor}.parquet")
    interval_value_counts = df.index.to_series().diff().value_counts()
    main_interval_occurences = interval_value_counts.iloc[0]
    main_interval = T.cast(pd.Timedelta, interval_value_counts.index[0])
    detide_period = pd.date_range(DETIDE_START, DETIDE_END, freq=main_interval, inclusive="left")
    table[station_sensor] = dict()
    item = ioc_cleanup_2024_with_data[ioc_cleanup_2024_with_data.ioc_code == station]
    table[station_sensor]["lon"] = item.lon.values[0]
    table[station_sensor]["lat"] = item.lat.values[0]
    table[station_sensor]["completeness"] = calc_ratio(df, detide_period)
    # redo per sensor 

stations_sensors_availability = pd.DataFrame(table).T
In [59]:
stations_sensors_availability.describe()
stations_sensors_availability.completeness.hvplot.hist()
Out[59]:
lon lat completeness
count 826.000000 826.000000 826.000000
mean -5.504539 19.513416 0.795058
std 89.155516 30.031451 0.275727
min -177.708000 -69.007778 0.000003
25% -71.627873 -0.950000 0.718041
50% -0.280155 24.306100 0.931198
75% 39.650000 42.635556 0.981895
max 179.194900 70.980000 1.448770
Out[59]:
In [60]:
stations_sensors_availability.hvplot.points(
    x= "lon", y='lat', 
    hover_cols = ['index',"completeness" ],
    color = "completeness", 
    geo=True,
    s = 200
).opts(
    height = 800,
    width = 1600, 
    cmap='colorwheel'
) * k2
Out[60]: