import requests
import pandas as pd
import hvplot.pandas
import json
import io
import holoviews as hv
import geoviews as gv
import numpy as np
import geopandas as gp
import searvey
from dateutil.relativedelta import *
hv.extension('bokeh')
/home/tomsail/work/gist/WebCritech/.venv/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html from .autonotebook import tqdm as notebook_tqdm
# Define the base URL for the API
sl_api = "https://webcritech.jrc.ec.europa.eu/SeaLevelsDb/api/Group/"
tad_api = "https://webcritech.jrc.ec.europa.eu/TAD_server/api/Groups/Get"
api_device = "https://webcritech.jrc.ec.europa.eu/api/Device/"
# Fetch the list of providers
sl_response = requests.get(sl_api)
tad_response = requests.get(tad_api)
sl_data = json.loads(sl_response.text) # Parse the JSON response
tad_data = json.loads(tad_response.text) # Parse the JSON response
Data availability¶
SeaLevelDB stations¶
# Initialize a list to hold all station data
SL_data = []
# Loop through the list of providers and fetch their stations
for provider in sl_data:
provider_name = provider['Name']
stations_url = sl_api + provider_name + "/Devices"
stations_response = requests.get(stations_url)
stations_data = json.loads(stations_response.text) # Parse the JSON response
# Loop through each station and extract data
for device in stations_data:
station_data = {
'Provider': provider_name,
'Id': device['Id'],
'Name': device['Name'],
'lat': device['Lat'],
'lon': device['Lon'],
'LastAccessStatus': device['CurrentStatus']['LastAccessStatus'],
'LastDate': device['CurrentStatus']['LastDate'],
'LastDate': device['CurrentStatus']['LastDate'],
'State': device['CurrentStatus']['State'],
'SyncStatus': device['CurrentStatus']['SyncStatus'],
'FileType': device['FileType'],
'GroupId': device['GroupId'],
'MovAvgNp': device['MovAvgNp'],
'Notes': device.get('Notes'), # Use .get() to handle missing keys
'Source': device['Source']
# Add other fields as necessary
}
SL_data.append(station_data)
SL_df = pd.DataFrame(SL_data)
SL_df
SL_df.to_csv('SeaLevelDb_stations.csv')
SL_df.groupby(['Provider','State']).count()[['Id']].hvplot.bar(
rot = 90,
logy = True,
ylim = [0.5, 2000],
grid = True
).opts(
width = 1200,
height = 800,
title = "SeaLevelDb data availability"
)
Provider | Id | Name | lat | lon | LastAccessStatus | LastDate | State | SyncStatus | FileType | GroupId | MovAvgNp | Notes | Source | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | BIG | 2896 | Indonesia - Tolitoli (Central Sulawesi) | 1.050620 | 120.800049 | ERR: NO DATA TYPE | 2020-01-23T15:10:00Z | active | ready | BIG | 38 | 2 | None | http://tides.big.go.id:8888/kacrut/0051TLTL02/... |
1 | BIG | 2897 | Indonesia - Mamuju (Western Sulawesi) | -2.666982 | 118.893349 | ERR: NO DATA TYPE | 2020-01-23T15:20:00Z | active | ready | BIG | 38 | 2 | None | http://tides.big.go.id:8888/kacrut/0009MMJU02/... |
2 | BIG | 2898 | Indonesia - Pantolan (Western Sulawesi) | -0.711140 | 119.856155 | ERR: NO DATA TYPE | 2020-01-23T15:20:00Z | active | ready | BIG | 38 | 2 | None | http://tides.big.go.id:8888/kacrut/0036PTLN02/... |
3 | BIG | 2899 | Indonesia - Kota Agung (Lampung) | -5.500444 | 104.619362 | ERR: NO DATA TYPE | 2020-01-23T15:20:00Z | active | ready | BIG | 38 | 2 | None | http://tides.big.go.id:8888/kacrut/0080KTAG01/... |
4 | BIG | 2900 | Indonesia - Krui (Lampung) | -5.183500 | 103.933052 | ERR: NO DATA TYPE | 2020-01-23T15:20:00Z | active | ready | BIG | 38 | 2 | None | http://tides.big.go.id:8888/kacrut/0100KRUI01/... |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1313 | TD UNESCO | 2569 | Ireland - New Ballyglass pier Belmullet | 54.252998 | -9.890000 | OK | 2024-09-19T10:55:00Z | active | ready | GLOSSxml | 6 | 2 | None | http://www.ioc-sealevelmonitoring.org/service.... |
1314 | TD UNESCO | 2915 | Philippines - Currimao_Prs | 18.017000 | 120.483002 | OK | 2024-09-18T15:18:00Z | active | ready | GLOSSxml | 6 | 2 | None | http://www.ioc-sealevelmonitoring.org/service.... |
1315 | UNIFI | 1872 | Italy - Stromboli Sciara del Fuoco (1min) | 38.798401 | 15.193100 | OK | 2013-01-14T14:28:47Z | active | None | UNIFI | 21 | -1 | None | http://webcritech.jrc.ec.europa.eu/modelling/S... |
1316 | UNIFI | 1873 | Italy - Stromboli Sciara del Fuoco (15 sec) | 38.798401 | 15.193100 | OK | 2014-04-26T13:29:45Z | active | None | TAD_T | 21 | -1 | None | http://webcritech.jrc.ec.europa.eu/tad_server/... |
1317 | UNIFI | 1874 | Italy - Stromboli Sciara del Fuoco (5 sec) | 38.798401 | 15.193100 | OK | 2013-01-14T14:29:36Z | inactive | None | UNIFI | 21 | -1 | None | http://webcritech.jrc.ec.europa.eu/modelling/S... |
1318 rows × 14 columns
TAD_data Stations
TAD_data = []
# Loop through the list of providers and fetch their stations
for provider in tad_data:
provider_name = provider['Name']
stations_url = f"https://webcritech.jrc.ec.europa.eu/TAD_server/api/Groups/GetGeoJSON?group={provider_name}&maxLatency=10000000"
stations_response = requests.get(stations_url)
stations_data = json.loads(stations_response.text) # Parse the JSON response
if isinstance(stations_data, dict): stations_data = [stations_data]
# Loop through each station and extract data
if len(stations_data)>0:
for device in stations_data[0]['features']:
properties = device.get('properties', {})
geometry = device.get('geometry', {})
coordinates = geometry.get('coordinates', [None, None])
latency = properties.get('Latency', {}).get('Literal')
last_data_date_str = properties.get('LastData', {}).get('Date')
last_data_date = pd.Timestamp(last_data_date_str)
# Determine if the measurement is within the last week
state = "active" if last_data_date and (pd.Timestamp.now() - last_data_date) <= pd.Timedelta(days=7) else "inactive"
station_data = {
'Provider': properties.get('Provider'),
'Id': device.get('id'),
'Name': properties.get('Name'),
'lat': coordinates[1],
'lon': coordinates[0],
'LastAccessStatus': properties.get('LastData', {}).get('Date'),
'LastDate': last_data_date,
'SyncStatus': latency,
'State': state,
'FileType': None, # Update this if there is a corresponding field in the new data
'GroupId': None, # Update this if there is a corresponding field in the new data
'MovAvgNp': None, # Update this if there is a corresponding field in the new data
'Notes': properties.get('Notes'), # Use .get() to handle missing keys
'Source': None
# Add other fields as necessary
}
TAD_data.append(station_data)
TAD_df = pd.DataFrame(TAD_data)
TAD_df
TAD_df.to_csv('TAD_stations.csv')
TAD_df.groupby(['Provider','State']).count()[['Id']].hvplot.bar(
rot = 90,
logy = True,
ylim = [0.5, 2000],
grid = True
).opts(
width = 1600,
height = 800,
title = "SeaLevelDb data availability"
)
Provider | Id | Name | lat | lon | LastAccessStatus | LastDate | SyncStatus | State | FileType | GroupId | MovAvgNp | Notes | Source | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | JRC-ISPRA | 64 | IDSL-01 | 43.878340 | 8.018800 | 30 Jun 2023 13:01:02 | 2023-06-30 13:01:02 | 446 Days | inactive | None | None | None | None | None |
1 | JRC-IGN | 79 | IDSL-06 | 36.542140 | -6.280612 | 19 Sep 2024 12:06:10 | 2024-09-19 12:06:10 | 16 Sec. | active | None | None | None | None | None |
2 | JRC-IGN | 80 | IDSL-07 | 37.567146 | -0.978958 | 19 Sep 2024 11:58:10 | 2024-09-19 11:58:10 | 8 Min. | active | None | None | None | None | None |
3 | JRC-NOA | 86 | IDSL-13 | 37.942535 | 22.933611 | 19 Sep 2024 12:06:20 | 2024-09-19 12:06:20 | 6 Sec. | active | None | None | None | None | None |
4 | JRC-KOERI | 87 | IDSL-14 | 39.835741 | 26.075859 | 06 Dec 2023 13:40:02 | 2023-12-06 13:40:02 | 287 Days | inactive | None | None | None | None | None |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1084 | NOAA | 313 | NOAA-Wilmington | 34.227500 | -77.953611 | 29 Jan 2024 06:00:00 | 2024-01-29 06:00:00 | 234 Days | inactive | None | None | None | None | None |
1085 | NOAA | 265 | NOAA-Woods_Hole | 41.523613 | -70.671112 | 29 Jan 2024 06:00:00 | 2024-01-29 06:00:00 | 234 Days | inactive | None | None | None | None | None |
1086 | NOAA | 314 | NOAA-Wrightsville_Beach | 34.213333 | -77.786667 | 29 Jan 2024 06:00:00 | 2024-01-29 06:00:00 | 234 Days | inactive | None | None | None | None | None |
1087 | NOAA | 447 | NOAA-Yakutat | 59.548333 | -139.733056 | 29 Jan 2024 06:00:00 | 2024-01-29 06:00:00 | 234 Days | inactive | None | None | None | None | None |
1088 | NOAA | 305 | NOAA-Yorktown_USCG_Training_Center | 37.226500 | -76.478800 | 29 Jan 2024 06:00:00 | 2024-01-29 06:00:00 | 234 Days | inactive | None | None | None | None | None |
1089 rows × 14 columns
Reference stations already in use in Seareport:
def get_stofs2d_meta():
stofs2d = pd.read_csv(
"https://polar.ncep.noaa.gov/stofs/data/stofs_2d_glo_elev_stat_v2_1_0",
names=["coords", "name"],
sep="!",
header=None,
skiprows=1
)
stofs2d = stofs2d.assign(
lon=stofs2d.coords.str.split("\t", n=1).str[0].astype(float),
lat=stofs2d.coords.str.strip().str.rsplit("\t", n=1).str[1].astype(float),
stofs2d_name=stofs2d.name.str.strip(),
).drop(columns=["coords", "name"])
return stofs2d
def get_ioc_meta() -> gp.GeoDataFrame:
meta_web = searvey.get_ioc_stations().drop(columns=["lon", "lat"])
meta_api = (
pd.read_json(
"http://www.ioc-sealevelmonitoring.org/service.php?query=stationlist&showall=all"
)
.drop_duplicates()
.drop(columns=["lon", "lat"])
.rename(columns={"Code": "ioc_code", "Lon": "lon", "Lat": "lat"})
)
merged = pd.merge(
meta_web,
meta_api[["ioc_code", "lon", "lat"]].drop_duplicates(),
on=["ioc_code"],
)
return merged
def get_coops_meta() -> gp.GeoDataFrame:
coops = searvey.get_coops_stations()
coops['lon'] = coops['geometry'].x
coops['lat'] = coops['geometry'].y
coops = coops.drop(columns='geometry')
return coops
def merge_ioc_and_stofs(ioc: pd.DataFrame, stofs2d: pd.DataFrame, coops = pd.DataFrame) -> pd.DataFrame:
stations = pd.concat((ioc, stofs2d), ignore_index=True)
stations = stations.assign(unique_id=stations.ioc_code.combine_first(stations.stofs2d_name))
stations = pd.concat((stations, coops),ignore_index=True)
stations = stations.assign(unique_id=stations.unique_id.combine_first(stations.nws_id))
return stations
ioc = get_ioc_meta()
stofs2d = get_stofs2d_meta()
coops = get_coops_meta()
stations = merge_ioc_and_stofs(ioc=ioc, stofs2d=stofs2d, coops = coops.drop(columns="removed"))
stations['is_sat'] = stations.unique_id.str.contains('SA')
stations['source'] = "stofs"
stations.loc[~stations.ioc_code.isna(), 'source'] = "ioc"
stations.loc[~stations.nws_id.isna(), 'source'] = "coops"
Compare all stations
plot1 = stations.drop(columns='geometry')[~stations['is_sat']].hvplot.points(
x = 'lon',
y='lat',
c = 'source',
cmap = 'fire',
geo = True,
s = 20,
legend = False
)
plot2 = pd.concat([SL_df,TAD_df]).hvplot.points(
x = 'lon',
y='lat',
c = 'Provider',
line_color = 'k',
line_width = 0.25,
cmap = 'glasbey',
geo = True,
s=150,
tiles="OSM",
)
(plot2 * plot1).opts(
width = 1400,
height = 1070
)
Let's compare now with the data availability
SL_df['LastDate'] = SL_df.apply(lambda x:pd.Timestamp(x['LastDate']).tz_localize(None), axis = 1)
TAD_df['LastDate'] = TAD_df.apply(lambda x:pd.Timestamp(x['LastDate']).tz_localize(None), axis = 1)
SL_df.loc[SL_df['LastDate']>pd.Timestamp.now(), 'LastDate']= pd.Timestamp.now()
SL_df.loc[SL_df['LastDate']<pd.Timestamp(2010,1,1), 'LastDate']= pd.Timestamp(2010,1,1)
SL_df.LastDate.hvplot.hist(bins=100).opts(title = "SeaLevelDb")
TAD_df.LastDate.hvplot.hist().opts(title = "TAD Server")
SL_df['period_inactive'] = SL_df.apply(lambda x:(pd.Timestamp.now() - pd.Timestamp(x['LastDate']).tz_localize(None)).total_seconds()/3600/24/365.25, axis = 1)
TAD_df['period_inactive'] = TAD_df.apply(lambda x:(pd.Timestamp.now() - pd.Timestamp(x['LastDate']).tz_localize(None)).total_seconds()/3600/24/365.25, axis = 1)
all_stations = pd.concat([SL_df,TAD_df])
all_stations.period_inactive.hvplot.hist().opts(title = "years inactive")
all_stations.to_csv('TAD_SLDB_stations.csv')
plot_3 = SL_df.hvplot.points(
x = 'lon',
y='lat',
c = 'period_inactive',
s = 50,
geo = True,
cmap = 'fire_r',
tiles="OSM",
).opts(height = 550, title = 'years inactive')
((plot2 * plot1).opts(height=550, title = 'Provider') + plot_3)
WARNING:param.GeoOverlayPlot03078: Due to internal constraints, when aspect and width/height is set, the bokeh backend uses those values as frame_width/frame_height instead. This ensures the aspect is respected, but means that the plot might be slightly larger than anticipated. Set the frame_width/frame_height explicitly to suppress this warning. WARNING:param.GeoOverlayPlot03106: Due to internal constraints, when aspect and width/height is set, the bokeh backend uses those values as frame_width/frame_height instead. This ensures the aspect is respected, but means that the plot might be slightly larger than anticipated. Set the frame_width/frame_height explicitly to suppress this warning.
Finnish (FMI) and Indonesian stations (BIG) seem to be exclusive on WebCritech Server.
Although they stopped recording data since January 2020.
plot_4 = TAD_df.hvplot.points(
x = 'lon',
y='lat',
c = 'period_inactive',
line_color = "k",
line_width = 0.25,
cmap = 'fire_r',
s = 50,
geo = True,
tiles="OSM",
).opts(title = "years inactive",height=550)
((plot2 * plot1).opts(height=550, title = 'Provider') + plot_4)
WARNING:param.GeoOverlayPlot03419: Due to internal constraints, when aspect and width/height is set, the bokeh backend uses those values as frame_width/frame_height instead. This ensures the aspect is respected, but means that the plot might be slightly larger than anticipated. Set the frame_width/frame_height explicitly to suppress this warning. WARNING:param.GeoOverlayPlot03447: Due to internal constraints, when aspect and width/height is set, the bokeh backend uses those values as frame_width/frame_height instead. This ensures the aspect is respected, but means that the plot might be slightly larger than anticipated. Set the frame_width/frame_height explicitly to suppress this warning.
Most of TAD stations seem to have stopped recording in January 2024
India (INCOIS) station seem to be exclusive on WebCritech server
Get Station detailed metadata (NOT FINISHED)¶
we'd need to get the First Date information, which can be retrieve by scrapping the website: at the URL https://webcritech.jrc.ec.europa.eu/SeaLevelsDb/Device/1014
(the device number is the Provider
column)
which is contained in html.hqs-js > body > div > div.container > section.row > div#dev-last-details.col-md-5 > dl.dl-horizontal > dd
of the source HTML.
from bs4 import BeautifulSoup
# A function to scrape the "First Date" from the given URL
def get_first_date(device_number):
print('.', end='')
url = f"https://webcritech.jrc.ec.europa.eu/SeaLevelsDb/Device/{device_number}"
response = requests.get(url)
if response.status_code == 200:
soup = BeautifulSoup(response.content, 'html.parser')
# Find all dl-horizontal elements, which contain dt and dd pairs
dl_elements = soup.find_all('dl', class_='dl-horizontal')
for dl in dl_elements:
# Find all dt elements within the current dl element
dt_elements = dl.find_all('dt')
# print(dt_elements)
for index, dt in enumerate(dt_elements):
# Check if the dt element's text is 'First date'
if dt.get_text().strip() == 'First date':
# Get the corresponding dd element that follows the dt element
first_date_dd = dl.find_all('dd')[index]
# print(first_date_dd.get_text().strip())
# 1/0
return first_date_dd.get_text().strip()
return None
# Apply the function to each row and store the results in a new column
SL_df['FirstDate'] = SL_df['Id'].apply(get_first_date)
from searvey.multi import multithread
# rate_limit
def fetch_SeaLevelDb_station(tmin: pd.Timestamp, tmax: pd.Timestamp, id: int, NSTEPS = 10000000):
url = f"https://webcritech.jrc.ec.europa.eu/SeaLevelsDb/api/Device/{id}/Data?tMin={tmin.strftime('%Y-%m-%d')}&tMax={tmax.strftime('%Y-%m-%d')}&nPts={NSTEPS}&field=level&mode=CSV"
stations_response = requests.get(url)
if stations_response.ok:
data = json.loads(stations_response.text) # Use `loads` instead of `load`
df = pd.DataFrame(data)
return df
else:
print(f"Error: {stations_response.status_code}")
return pd.DataFrame()
def fetch_TAD_station(tmin: pd.Timestamp, tmax: pd.Timestamp, id: int, NSTEPS = 10000000):
url = f"https://webcritech.jrc.ec.europa.eu/TAD_server/api/Data/Get/{id}?tMin={tmin.strftime('%Y-%m-%d')}&tMax={tmax.strftime('%Y-%m-%d')}&nRec={NSTEPS}&mode=CSV"
stations_response = requests.get(url)
if stations_response.ok:
data = io.StringIO(stations_response.text) # Create a text stream object
df = pd.read_csv(data) # Read the DataFrame from the text stream
return df
else:
print(f"Error: {stations_response.status_code}")
return pd.DataFrame()
def get_TAD_stations(df:pd.DataFrame, device_var: str):
for s, station in df.iterrows():
print(s, station)
1/0
def get_SLDB_stations(df:pd.DataFrame, device_var: str):
for s, station in df.iloc[::-1].iterrows():
print(station)
func_kwargs = []
for tstep in pd.date_range(pd.Timestamp(2020, 1, 1), station.LastDate, freq='MS'):
tmin = tstep
tmax = tstep + relativedelta(months=+1)
func_kwargs.append(
dict(
tmin=tmin,
tmax=tmax,
Device=station[device_var],
),
)
results = multithread(
func=fetch_SeaLevelDb_station,
func_kwargs=func_kwargs,
n_workers=10,
print_exceptions=False,
disable_progress_bar=False,
)
for result in results:
if result.result is not None:
df = result.result
print(df)
1/0
get_SLDB_stations(SL_df, "Id")
Device = 94
url = f"https://webcritech.jrc.ec.europa.eu/TAD_server/api/Data/Get/{Device}?tMin=2024-08-01&tMax=2024-09-01&nRec={10000000}&mode=CSV"
stations_response = requests.get(url)
data = io.StringIO(stations_response.text) # Create a text stream object
df = pd.read_csv(data) # Read the DataFrame from the text stream
df
Time(UTC) | Lev RAD (m) | Panel (V) | Rms (m) | Sensor Temp (C) | Temperature (C) | Alert | Alert Signal (m) | Battery (V) | Forecast 30 (m) | Forecast 300 (m) | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 21 Aug 2024 04:28:27 | 4.291 | -5.5 | 0.000 | 42.465 | 36.3 | 0.0 | 0.000 | 10.943 | 4.291 | 4.291 |
1 | 21 Aug 2024 04:28:33 | 4.289 | -5.5 | 0.000 | 42.453 | 36.3 | 0.0 | 0.000 | 10.972 | 4.290 | 4.290 |
2 | 21 Aug 2024 04:28:39 | 4.296 | -5.5 | 0.000 | 42.747 | 36.3 | 0.0 | 0.000 | 10.967 | 4.296 | 4.296 |
3 | 21 Aug 2024 04:28:45 | 4.298 | -5.5 | 0.000 | 42.469 | 36.3 | 0.0 | 0.000 | 10.978 | 4.299 | 4.299 |
4 | 21 Aug 2024 04:28:51 | 4.285 | -5.5 | 0.000 | 42.465 | 36.3 | 0.0 | 0.000 | 10.977 | 4.288 | 4.288 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
79909 | 31 Aug 2024 23:59:35 | 4.300 | -5.5 | 0.014 | 44.464 | 48.7 | 0.0 | 0.025 | 11.431 | 4.274 | 4.249 |
79910 | 31 Aug 2024 23:59:41 | 4.241 | -5.5 | 0.014 | 44.460 | 48.7 | 0.0 | 0.018 | 11.432 | 4.267 | 4.249 |
79911 | 31 Aug 2024 23:59:47 | 4.265 | -5.5 | 0.014 | 44.455 | 48.7 | 0.0 | 0.014 | 11.425 | 4.263 | 4.249 |
79912 | 31 Aug 2024 23:59:53 | 4.257 | -5.5 | 0.014 | 44.455 | 48.7 | 0.0 | 0.012 | 11.435 | 4.262 | 4.249 |
79913 | 31 Aug 2024 23:59:59 | 4.212 | -5.5 | 0.014 | 44.447 | 48.7 | 0.0 | 0.004 | 11.430 | 4.252 | 4.249 |
79914 rows × 11 columns
df.hvplot.line( y='Lev RAD (m)')