In [1]:
import requests
import pandas as pd
import hvplot.pandas
import json
import io
import holoviews as hv
import geoviews as gv
import numpy as np
import geopandas as gp
import searvey
from dateutil.relativedelta import *

hv.extension('bokeh')
/home/tomsail/work/gist/WebCritech/.venv/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm
In [2]:
# Define the base URL for the API
sl_api  = "https://webcritech.jrc.ec.europa.eu/SeaLevelsDb/api/Group/"
tad_api = "https://webcritech.jrc.ec.europa.eu/TAD_server/api/Groups/Get"

api_device = "https://webcritech.jrc.ec.europa.eu/api/Device/"

# Fetch the list of providers
sl_response = requests.get(sl_api)
tad_response = requests.get(tad_api)
sl_data = json.loads(sl_response.text)  # Parse the JSON response
tad_data = json.loads(tad_response.text)  # Parse the JSON response

Data availability¶

SeaLevelDB stations¶

In [3]:
# Initialize a list to hold all station data
SL_data = []

# Loop through the list of providers and fetch their stations
for provider in sl_data:
    provider_name = provider['Name']
    stations_url = sl_api + provider_name + "/Devices"
    stations_response = requests.get(stations_url)
    stations_data = json.loads(stations_response.text)  # Parse the JSON response
    # Loop through each station and extract data
    for device in stations_data:
        station_data = {
            'Provider': provider_name,
            'Id': device['Id'],
            'Name': device['Name'],
            'lat': device['Lat'],
            'lon': device['Lon'],
            'LastAccessStatus': device['CurrentStatus']['LastAccessStatus'],
            'LastDate': device['CurrentStatus']['LastDate'],
            'LastDate': device['CurrentStatus']['LastDate'],
            'State': device['CurrentStatus']['State'],
            'SyncStatus': device['CurrentStatus']['SyncStatus'],
            'FileType': device['FileType'],
            'GroupId': device['GroupId'],
            'MovAvgNp': device['MovAvgNp'],
            'Notes': device.get('Notes'),  # Use .get() to handle missing keys
            'Source': device['Source']
            # Add other fields as necessary
        }
        SL_data.append(station_data)
SL_df = pd.DataFrame(SL_data)
SL_df
SL_df.to_csv('SeaLevelDb_stations.csv')
SL_df.groupby(['Provider','State']).count()[['Id']].hvplot.bar(
    rot = 90,
    logy = True,
    ylim = [0.5, 2000], 
    grid = True
).opts(
    width = 1200,
    height = 800,
    title = "SeaLevelDb data availability"
)
Out[3]:
Provider Id Name lat lon LastAccessStatus LastDate State SyncStatus FileType GroupId MovAvgNp Notes Source
0 BIG 2896 Indonesia - Tolitoli (Central Sulawesi) 1.050620 120.800049 ERR: NO DATA TYPE 2020-01-23T15:10:00Z active ready BIG 38 2 None http://tides.big.go.id:8888/kacrut/0051TLTL02/...
1 BIG 2897 Indonesia - Mamuju (Western Sulawesi) -2.666982 118.893349 ERR: NO DATA TYPE 2020-01-23T15:20:00Z active ready BIG 38 2 None http://tides.big.go.id:8888/kacrut/0009MMJU02/...
2 BIG 2898 Indonesia - Pantolan (Western Sulawesi) -0.711140 119.856155 ERR: NO DATA TYPE 2020-01-23T15:20:00Z active ready BIG 38 2 None http://tides.big.go.id:8888/kacrut/0036PTLN02/...
3 BIG 2899 Indonesia - Kota Agung (Lampung) -5.500444 104.619362 ERR: NO DATA TYPE 2020-01-23T15:20:00Z active ready BIG 38 2 None http://tides.big.go.id:8888/kacrut/0080KTAG01/...
4 BIG 2900 Indonesia - Krui (Lampung) -5.183500 103.933052 ERR: NO DATA TYPE 2020-01-23T15:20:00Z active ready BIG 38 2 None http://tides.big.go.id:8888/kacrut/0100KRUI01/...
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1313 TD UNESCO 2569 Ireland - New Ballyglass pier Belmullet 54.252998 -9.890000 OK 2024-09-19T10:55:00Z active ready GLOSSxml 6 2 None http://www.ioc-sealevelmonitoring.org/service....
1314 TD UNESCO 2915 Philippines - Currimao_Prs 18.017000 120.483002 OK 2024-09-18T15:18:00Z active ready GLOSSxml 6 2 None http://www.ioc-sealevelmonitoring.org/service....
1315 UNIFI 1872 Italy - Stromboli Sciara del Fuoco (1min) 38.798401 15.193100 OK 2013-01-14T14:28:47Z active None UNIFI 21 -1 None http://webcritech.jrc.ec.europa.eu/modelling/S...
1316 UNIFI 1873 Italy - Stromboli Sciara del Fuoco (15 sec) 38.798401 15.193100 OK 2014-04-26T13:29:45Z active None TAD_T 21 -1 None http://webcritech.jrc.ec.europa.eu/tad_server/...
1317 UNIFI 1874 Italy - Stromboli Sciara del Fuoco (5 sec) 38.798401 15.193100 OK 2013-01-14T14:29:36Z inactive None UNIFI 21 -1 None http://webcritech.jrc.ec.europa.eu/modelling/S...

1318 rows × 14 columns

Out[3]:

TAD_data Stations

In [4]:
TAD_data = []
# Loop through the list of providers and fetch their stations
for provider in tad_data:
    provider_name = provider['Name']
    stations_url = f"https://webcritech.jrc.ec.europa.eu/TAD_server/api/Groups/GetGeoJSON?group={provider_name}&maxLatency=10000000"
    stations_response = requests.get(stations_url)
    stations_data = json.loads(stations_response.text)  # Parse the JSON response
    if isinstance(stations_data, dict): stations_data = [stations_data]
    # Loop through each station and extract data
    if len(stations_data)>0:
        for device in stations_data[0]['features']:
            properties = device.get('properties', {})
            geometry = device.get('geometry', {})
            coordinates = geometry.get('coordinates', [None, None])
            latency = properties.get('Latency', {}).get('Literal')
            last_data_date_str = properties.get('LastData', {}).get('Date')
            last_data_date = pd.Timestamp(last_data_date_str)
            
            # Determine if the measurement is within the last week
            state = "active" if last_data_date and (pd.Timestamp.now() - last_data_date) <= pd.Timedelta(days=7) else "inactive"

            station_data = {
                'Provider': properties.get('Provider'),
                'Id': device.get('id'),
                'Name': properties.get('Name'),
                'lat': coordinates[1],
                'lon': coordinates[0],
                'LastAccessStatus': properties.get('LastData', {}).get('Date'),
                'LastDate': last_data_date,
                'SyncStatus': latency,
                'State': state,
                'FileType': None,  # Update this if there is a corresponding field in the new data
                'GroupId': None,  # Update this if there is a corresponding field in the new data
                'MovAvgNp': None,  # Update this if there is a corresponding field in the new data
                'Notes': properties.get('Notes'),  # Use .get() to handle missing keys
                'Source': None
                # Add other fields as necessary
            }
            TAD_data.append(station_data)
TAD_df = pd.DataFrame(TAD_data)
TAD_df
TAD_df.to_csv('TAD_stations.csv')
TAD_df.groupby(['Provider','State']).count()[['Id']].hvplot.bar(
    rot = 90,
    logy = True,
    ylim = [0.5, 2000], 
    grid = True
).opts(
    width = 1600,
    height = 800,
    title = "SeaLevelDb data availability"
)
Out[4]:
Provider Id Name lat lon LastAccessStatus LastDate SyncStatus State FileType GroupId MovAvgNp Notes Source
0 JRC-ISPRA 64 IDSL-01 43.878340 8.018800 30 Jun 2023 13:01:02 2023-06-30 13:01:02 446 Days inactive None None None None None
1 JRC-IGN 79 IDSL-06 36.542140 -6.280612 19 Sep 2024 12:06:10 2024-09-19 12:06:10 16 Sec. active None None None None None
2 JRC-IGN 80 IDSL-07 37.567146 -0.978958 19 Sep 2024 11:58:10 2024-09-19 11:58:10 8 Min. active None None None None None
3 JRC-NOA 86 IDSL-13 37.942535 22.933611 19 Sep 2024 12:06:20 2024-09-19 12:06:20 6 Sec. active None None None None None
4 JRC-KOERI 87 IDSL-14 39.835741 26.075859 06 Dec 2023 13:40:02 2023-12-06 13:40:02 287 Days inactive None None None None None
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1084 NOAA 313 NOAA-Wilmington 34.227500 -77.953611 29 Jan 2024 06:00:00 2024-01-29 06:00:00 234 Days inactive None None None None None
1085 NOAA 265 NOAA-Woods_Hole 41.523613 -70.671112 29 Jan 2024 06:00:00 2024-01-29 06:00:00 234 Days inactive None None None None None
1086 NOAA 314 NOAA-Wrightsville_Beach 34.213333 -77.786667 29 Jan 2024 06:00:00 2024-01-29 06:00:00 234 Days inactive None None None None None
1087 NOAA 447 NOAA-Yakutat 59.548333 -139.733056 29 Jan 2024 06:00:00 2024-01-29 06:00:00 234 Days inactive None None None None None
1088 NOAA 305 NOAA-Yorktown_USCG_Training_Center 37.226500 -76.478800 29 Jan 2024 06:00:00 2024-01-29 06:00:00 234 Days inactive None None None None None

1089 rows × 14 columns

Out[4]:

Reference stations already in use in Seareport:

In [5]:
def get_stofs2d_meta():
    stofs2d = pd.read_csv(
        "https://polar.ncep.noaa.gov/stofs/data/stofs_2d_glo_elev_stat_v2_1_0",
        names=["coords", "name"],
        sep="!",
        header=None,
        skiprows=1
    )
    stofs2d = stofs2d.assign(
        lon=stofs2d.coords.str.split("\t", n=1).str[0].astype(float),
        lat=stofs2d.coords.str.strip().str.rsplit("\t", n=1).str[1].astype(float),
        stofs2d_name=stofs2d.name.str.strip(),
    ).drop(columns=["coords", "name"])
    return stofs2d


def get_ioc_meta() -> gp.GeoDataFrame:
    meta_web = searvey.get_ioc_stations().drop(columns=["lon", "lat"])
    meta_api = (
        pd.read_json(
            "http://www.ioc-sealevelmonitoring.org/service.php?query=stationlist&showall=all"
        )
        .drop_duplicates()
        .drop(columns=["lon", "lat"])
        .rename(columns={"Code": "ioc_code", "Lon": "lon", "Lat": "lat"})
    )
    merged = pd.merge(
        meta_web,
        meta_api[["ioc_code", "lon", "lat"]].drop_duplicates(),
        on=["ioc_code"],
    )
    return merged

def get_coops_meta() -> gp.GeoDataFrame: 
    coops = searvey.get_coops_stations()
    coops['lon'] = coops['geometry'].x
    coops['lat'] = coops['geometry'].y
    coops = coops.drop(columns='geometry')
    return coops

def merge_ioc_and_stofs(ioc: pd.DataFrame, stofs2d: pd.DataFrame, coops = pd.DataFrame) -> pd.DataFrame:
    stations = pd.concat((ioc, stofs2d), ignore_index=True)
    stations = stations.assign(unique_id=stations.ioc_code.combine_first(stations.stofs2d_name))
    stations = pd.concat((stations, coops),ignore_index=True)
    stations = stations.assign(unique_id=stations.unique_id.combine_first(stations.nws_id))
    return stations

ioc = get_ioc_meta()
stofs2d = get_stofs2d_meta()
coops = get_coops_meta()
stations = merge_ioc_and_stofs(ioc=ioc, stofs2d=stofs2d, coops = coops.drop(columns="removed"))
stations['is_sat'] = stations.unique_id.str.contains('SA')
stations['source'] = "stofs"
stations.loc[~stations.ioc_code.isna(), 'source'] = "ioc"
stations.loc[~stations.nws_id.isna(), 'source'] = "coops"

Compare all stations

In [6]:
plot1 = stations.drop(columns='geometry')[~stations['is_sat']].hvplot.points(
    x = 'lon',
    y='lat',
    c = 'source',
    cmap = 'fire',
    geo = True,
    s = 20,
    legend = False
)
plot2 = pd.concat([SL_df,TAD_df]).hvplot.points(
    x = 'lon',
    y='lat',
    c = 'Provider',
    line_color = 'k',
    line_width = 0.25,
    cmap = 'glasbey',
    geo = True,
    s=150,
    tiles="OSM",
)

(plot2 * plot1).opts(
    width = 1400,
    height = 1070
)
Out[6]:

Let's compare now with the data availability

In [7]:
SL_df['LastDate'] =  SL_df.apply(lambda x:pd.Timestamp(x['LastDate']).tz_localize(None), axis = 1)
TAD_df['LastDate'] = TAD_df.apply(lambda x:pd.Timestamp(x['LastDate']).tz_localize(None), axis = 1)
SL_df.loc[SL_df['LastDate']>pd.Timestamp.now(), 'LastDate']= pd.Timestamp.now()
SL_df.loc[SL_df['LastDate']<pd.Timestamp(2010,1,1), 'LastDate']= pd.Timestamp(2010,1,1)
SL_df.LastDate.hvplot.hist(bins=100).opts(title = "SeaLevelDb")
TAD_df.LastDate.hvplot.hist().opts(title = "TAD Server")
Out[7]:
Out[7]:
In [8]:
SL_df['period_inactive'] = SL_df.apply(lambda x:(pd.Timestamp.now() - pd.Timestamp(x['LastDate']).tz_localize(None)).total_seconds()/3600/24/365.25, axis = 1)
TAD_df['period_inactive'] = TAD_df.apply(lambda x:(pd.Timestamp.now() - pd.Timestamp(x['LastDate']).tz_localize(None)).total_seconds()/3600/24/365.25, axis = 1)

all_stations = pd.concat([SL_df,TAD_df])
all_stations.period_inactive.hvplot.hist().opts(title = "years inactive")
Out[8]:
In [9]:
all_stations.to_csv('TAD_SLDB_stations.csv')
In [10]:
plot_3 = SL_df.hvplot.points(
    x = 'lon',
    y='lat',
    c = 'period_inactive',
    s = 50,
    geo = True,
    cmap = 'fire_r',
    tiles="OSM",
).opts(height = 550, title = 'years inactive')

((plot2 * plot1).opts(height=550, title = 'Provider') + plot_3)
WARNING:param.GeoOverlayPlot03078: Due to internal constraints, when aspect and width/height is set, the bokeh backend uses those values as frame_width/frame_height instead. This ensures the aspect is respected, but means that the plot might be slightly larger than anticipated. Set the frame_width/frame_height explicitly to suppress this warning.
WARNING:param.GeoOverlayPlot03106: Due to internal constraints, when aspect and width/height is set, the bokeh backend uses those values as frame_width/frame_height instead. This ensures the aspect is respected, but means that the plot might be slightly larger than anticipated. Set the frame_width/frame_height explicitly to suppress this warning.
Out[10]:

Finnish (FMI) and Indonesian stations (BIG) seem to be exclusive on WebCritech Server.

Although they stopped recording data since January 2020.

In [11]:
plot_4 = TAD_df.hvplot.points(
    x = 'lon',
    y='lat',
    c = 'period_inactive',
    line_color = "k",
    line_width = 0.25,
    cmap = 'fire_r',
    s = 50,
    geo = True,
    tiles="OSM",
).opts(title = "years inactive",height=550)

((plot2 * plot1).opts(height=550, title = 'Provider') + plot_4)
WARNING:param.GeoOverlayPlot03419: Due to internal constraints, when aspect and width/height is set, the bokeh backend uses those values as frame_width/frame_height instead. This ensures the aspect is respected, but means that the plot might be slightly larger than anticipated. Set the frame_width/frame_height explicitly to suppress this warning.
WARNING:param.GeoOverlayPlot03447: Due to internal constraints, when aspect and width/height is set, the bokeh backend uses those values as frame_width/frame_height instead. This ensures the aspect is respected, but means that the plot might be slightly larger than anticipated. Set the frame_width/frame_height explicitly to suppress this warning.
Out[11]:

Most of TAD stations seem to have stopped recording in January 2024

India (INCOIS) station seem to be exclusive on WebCritech server

Get Station detailed metadata (NOT FINISHED)¶

we'd need to get the First Date information, which can be retrieve by scrapping the website: at the URL https://webcritech.jrc.ec.europa.eu/SeaLevelsDb/Device/1014 (the device number is the Provider column) which is contained in html.hqs-js > body > div > div.container > section.row > div#dev-last-details.col-md-5 > dl.dl-horizontal > dd of the source HTML.

In [ ]:
from bs4 import BeautifulSoup
# A function to scrape the "First Date" from the given URL
def get_first_date(device_number):
    print('.', end='')
    url = f"https://webcritech.jrc.ec.europa.eu/SeaLevelsDb/Device/{device_number}"
    response = requests.get(url)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        # Find all dl-horizontal elements, which contain dt and dd pairs
        dl_elements = soup.find_all('dl', class_='dl-horizontal')
        for dl in dl_elements:
            # Find all dt elements within the current dl element
            dt_elements = dl.find_all('dt')
            # print(dt_elements)
            for index, dt in enumerate(dt_elements):
                # Check if the dt element's text is 'First date'
                if dt.get_text().strip() == 'First date':
                    # Get the corresponding dd element that follows the dt element
                    first_date_dd = dl.find_all('dd')[index]
                    # print(first_date_dd.get_text().strip())
                    # 1/0
                    return first_date_dd.get_text().strip()
    return None

# Apply the function to each row and store the results in a new column
SL_df['FirstDate'] = SL_df['Id'].apply(get_first_date)
In [ ]:
from searvey.multi import multithread

# rate_limit 
def fetch_SeaLevelDb_station(tmin: pd.Timestamp, tmax: pd.Timestamp, id: int, NSTEPS = 10000000):
    url = f"https://webcritech.jrc.ec.europa.eu/SeaLevelsDb/api/Device/{id}/Data?tMin={tmin.strftime('%Y-%m-%d')}&tMax={tmax.strftime('%Y-%m-%d')}&nPts={NSTEPS}&field=level&mode=CSV"
    stations_response = requests.get(url)
    if stations_response.ok:
        data = json.loads(stations_response.text)  # Use `loads` instead of `load`
        df = pd.DataFrame(data)
        return df
    else:
        print(f"Error: {stations_response.status_code}")
        return pd.DataFrame()


def fetch_TAD_station(tmin: pd.Timestamp, tmax: pd.Timestamp, id: int, NSTEPS = 10000000): 
    url = f"https://webcritech.jrc.ec.europa.eu/TAD_server/api/Data/Get/{id}?tMin={tmin.strftime('%Y-%m-%d')}&tMax={tmax.strftime('%Y-%m-%d')}&nRec={NSTEPS}&mode=CSV"
    stations_response = requests.get(url)
    if stations_response.ok:
        data = io.StringIO(stations_response.text)  # Create a text stream object
        df = pd.read_csv(data)  # Read the DataFrame from the text stream
        return df
    else:
        print(f"Error: {stations_response.status_code}")
        return pd.DataFrame()


def get_TAD_stations(df:pd.DataFrame, device_var: str):
    for s, station in df.iterrows():
        print(s, station)
        1/0


def get_SLDB_stations(df:pd.DataFrame, device_var: str):
    for s, station in df.iloc[::-1].iterrows():
        print(station)
        func_kwargs = []
        for tstep in pd.date_range(pd.Timestamp(2020, 1, 1), station.LastDate, freq='MS'):
            tmin = tstep
            tmax = tstep + relativedelta(months=+1)
            func_kwargs.append(
                dict(
                    tmin=tmin,
                    tmax=tmax,
                    Device=station[device_var],
                ),
            )
        results = multithread(
            func=fetch_SeaLevelDb_station,
            func_kwargs=func_kwargs,
            n_workers=10,
            print_exceptions=False,
            disable_progress_bar=False,
        )   
        for result in results:
            if result.result is not None:
                df = result.result
                print(df)
        1/0

get_SLDB_stations(SL_df, "Id")
In [27]:
Device = 94
url = f"https://webcritech.jrc.ec.europa.eu/TAD_server/api/Data/Get/{Device}?tMin=2024-08-01&tMax=2024-09-01&nRec={10000000}&mode=CSV"
In [28]:
stations_response = requests.get(url)
data = io.StringIO(stations_response.text)  # Create a text stream object
df = pd.read_csv(data)  # Read the DataFrame from the text stream
df
Out[28]:
Time(UTC) Lev RAD (m) Panel (V) Rms (m) Sensor Temp (C) Temperature (C) Alert Alert Signal (m) Battery (V) Forecast 30 (m) Forecast 300 (m)
0 21 Aug 2024 04:28:27 4.291 -5.5 0.000 42.465 36.3 0.0 0.000 10.943 4.291 4.291
1 21 Aug 2024 04:28:33 4.289 -5.5 0.000 42.453 36.3 0.0 0.000 10.972 4.290 4.290
2 21 Aug 2024 04:28:39 4.296 -5.5 0.000 42.747 36.3 0.0 0.000 10.967 4.296 4.296
3 21 Aug 2024 04:28:45 4.298 -5.5 0.000 42.469 36.3 0.0 0.000 10.978 4.299 4.299
4 21 Aug 2024 04:28:51 4.285 -5.5 0.000 42.465 36.3 0.0 0.000 10.977 4.288 4.288
... ... ... ... ... ... ... ... ... ... ... ...
79909 31 Aug 2024 23:59:35 4.300 -5.5 0.014 44.464 48.7 0.0 0.025 11.431 4.274 4.249
79910 31 Aug 2024 23:59:41 4.241 -5.5 0.014 44.460 48.7 0.0 0.018 11.432 4.267 4.249
79911 31 Aug 2024 23:59:47 4.265 -5.5 0.014 44.455 48.7 0.0 0.014 11.425 4.263 4.249
79912 31 Aug 2024 23:59:53 4.257 -5.5 0.014 44.455 48.7 0.0 0.012 11.435 4.262 4.249
79913 31 Aug 2024 23:59:59 4.212 -5.5 0.014 44.447 48.7 0.0 0.004 11.430 4.252 4.249

79914 rows × 11 columns

In [33]:
df.hvplot.line( y='Lev RAD (m)')
Out[33]: