import requests
import pandas as pd
import hvplot.pandas
import json
import io
import holoviews as hv
import geoviews as gv
import numpy as np
import geopandas as gp
import searvey
from dateutil.relativedelta import *

hv.extension('bokeh')

/home/tomsail/work/gist/WebCritech/.venv/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm

# Define the base URL for the API
sl_api  = "https://webcritech.jrc.ec.europa.eu/SeaLevelsDb/api/Group/"
tad_api = "https://webcritech.jrc.ec.europa.eu/TAD_server/api/Groups/Get"

api_device = "https://webcritech.jrc.ec.europa.eu/api/Device/"

# Fetch the list of providers
sl_response = requests.get(sl_api)
tad_response = requests.get(tad_api)
sl_data = json.loads(sl_response.text)  # Parse the JSON response
tad_data = json.loads(tad_response.text)  # Parse the JSON response

# Initialize a list to hold all station data
SL_data = []

# Loop through the list of providers and fetch their stations
for provider in sl_data:
    provider_name = provider['Name']
    stations_url = sl_api + provider_name + "/Devices"
    stations_response = requests.get(stations_url)
    stations_data = json.loads(stations_response.text)  # Parse the JSON response
    # Loop through each station and extract data
    for device in stations_data:
        station_data = {
            'Provider': provider_name,
            'Id': device['Id'],
            'Name': device['Name'],
            'lat': device['Lat'],
            'lon': device['Lon'],
            'LastAccessStatus': device['CurrentStatus']['LastAccessStatus'],
            'LastDate': device['CurrentStatus']['LastDate'],
            'LastDate': device['CurrentStatus']['LastDate'],
            'State': device['CurrentStatus']['State'],
            'SyncStatus': device['CurrentStatus']['SyncStatus'],
            'FileType': device['FileType'],
            'GroupId': device['GroupId'],
            'MovAvgNp': device['MovAvgNp'],
            'Notes': device.get('Notes'),  # Use .get() to handle missing keys
            'Source': device['Source']
            # Add other fields as necessary
        }
        SL_data.append(station_data)
SL_df = pd.DataFrame(SL_data)
SL_df
SL_df.to_csv('SeaLevelDb_stations.csv')
SL_df.groupby(['Provider','State']).count()[['Id']].hvplot.bar(
    rot = 90,
    logy = True,
    ylim = [0.5, 2000], 
    grid = True
).opts(
    width = 1200,
    height = 800,
    title = "SeaLevelDb data availability"
)

TAD_data = []
# Loop through the list of providers and fetch their stations
for provider in tad_data:
    provider_name = provider['Name']
    stations_url = f"https://webcritech.jrc.ec.europa.eu/TAD_server/api/Groups/GetGeoJSON?group={provider_name}&maxLatency=10000000"
    stations_response = requests.get(stations_url)
    stations_data = json.loads(stations_response.text)  # Parse the JSON response
    if isinstance(stations_data, dict): stations_data = [stations_data]
    # Loop through each station and extract data
    if len(stations_data)>0:
        for device in stations_data[0]['features']:
            properties = device.get('properties', {})
            geometry = device.get('geometry', {})
            coordinates = geometry.get('coordinates', [None, None])
            latency = properties.get('Latency', {}).get('Literal')
            last_data_date_str = properties.get('LastData', {}).get('Date')
            last_data_date = pd.Timestamp(last_data_date_str)
            
            # Determine if the measurement is within the last week
            state = "active" if last_data_date and (pd.Timestamp.now() - last_data_date) <= pd.Timedelta(days=7) else "inactive"

            station_data = {
                'Provider': properties.get('Provider'),
                'Id': device.get('id'),
                'Name': properties.get('Name'),
                'lat': coordinates[1],
                'lon': coordinates[0],
                'LastAccessStatus': properties.get('LastData', {}).get('Date'),
                'LastDate': last_data_date,
                'SyncStatus': latency,
                'State': state,
                'FileType': None,  # Update this if there is a corresponding field in the new data
                'GroupId': None,  # Update this if there is a corresponding field in the new data
                'MovAvgNp': None,  # Update this if there is a corresponding field in the new data
                'Notes': properties.get('Notes'),  # Use .get() to handle missing keys
                'Source': None
                # Add other fields as necessary
            }
            TAD_data.append(station_data)
TAD_df = pd.DataFrame(TAD_data)
TAD_df
TAD_df.to_csv('TAD_stations.csv')
TAD_df.groupby(['Provider','State']).count()[['Id']].hvplot.bar(
    rot = 90,
    logy = True,
    ylim = [0.5, 2000], 
    grid = True
).opts(
    width = 1600,
    height = 800,
    title = "SeaLevelDb data availability"
)

def get_stofs2d_meta():
    stofs2d = pd.read_csv(
        "https://polar.ncep.noaa.gov/stofs/data/stofs_2d_glo_elev_stat_v2_1_0",
        names=["coords", "name"],
        sep="!",
        header=None,
        skiprows=1
    )
    stofs2d = stofs2d.assign(
        lon=stofs2d.coords.str.split("\t", n=1).str[0].astype(float),
        lat=stofs2d.coords.str.strip().str.rsplit("\t", n=1).str[1].astype(float),
        stofs2d_name=stofs2d.name.str.strip(),
    ).drop(columns=["coords", "name"])
    return stofs2d


def get_ioc_meta() -> gp.GeoDataFrame:
    meta_web = searvey.get_ioc_stations().drop(columns=["lon", "lat"])
    meta_api = (
        pd.read_json(
            "http://www.ioc-sealevelmonitoring.org/service.php?query=stationlist&showall=all"
        )
        .drop_duplicates()
        .drop(columns=["lon", "lat"])
        .rename(columns={"Code": "ioc_code", "Lon": "lon", "Lat": "lat"})
    )
    merged = pd.merge(
        meta_web,
        meta_api[["ioc_code", "lon", "lat"]].drop_duplicates(),
        on=["ioc_code"],
    )
    return merged

def get_coops_meta() -> gp.GeoDataFrame: 
    coops = searvey.get_coops_stations()
    coops['lon'] = coops['geometry'].x
    coops['lat'] = coops['geometry'].y
    coops = coops.drop(columns='geometry')
    return coops

def merge_ioc_and_stofs(ioc: pd.DataFrame, stofs2d: pd.DataFrame, coops = pd.DataFrame) -> pd.DataFrame:
    stations = pd.concat((ioc, stofs2d), ignore_index=True)
    stations = stations.assign(unique_id=stations.ioc_code.combine_first(stations.stofs2d_name))
    stations = pd.concat((stations, coops),ignore_index=True)
    stations = stations.assign(unique_id=stations.unique_id.combine_first(stations.nws_id))
    return stations

ioc = get_ioc_meta()
stofs2d = get_stofs2d_meta()
coops = get_coops_meta()
stations = merge_ioc_and_stofs(ioc=ioc, stofs2d=stofs2d, coops = coops.drop(columns="removed"))
stations['is_sat'] = stations.unique_id.str.contains('SA')
stations['source'] = "stofs"
stations.loc[~stations.ioc_code.isna(), 'source'] = "ioc"
stations.loc[~stations.nws_id.isna(), 'source'] = "coops"

plot1 = stations.drop(columns='geometry')[~stations['is_sat']].hvplot.points(
    x = 'lon',
    y='lat',
    c = 'source',
    cmap = 'fire',
    geo = True,
    s = 20,
    legend = False
)
plot2 = pd.concat([SL_df,TAD_df]).hvplot.points(
    x = 'lon',
    y='lat',
    c = 'Provider',
    line_color = 'k',
    line_width = 0.25,
    cmap = 'glasbey',
    geo = True,
    s=150,
    tiles="OSM",
)

(plot2 * plot1).opts(
    width = 1400,
    height = 1070
)

SL_df['LastDate'] =  SL_df.apply(lambda x:pd.Timestamp(x['LastDate']).tz_localize(None), axis = 1)
TAD_df['LastDate'] = TAD_df.apply(lambda x:pd.Timestamp(x['LastDate']).tz_localize(None), axis = 1)
SL_df.loc[SL_df['LastDate']>pd.Timestamp.now(), 'LastDate']= pd.Timestamp.now()
SL_df.loc[SL_df['LastDate']<pd.Timestamp(2010,1,1), 'LastDate']= pd.Timestamp(2010,1,1)
SL_df.LastDate.hvplot.hist(bins=100).opts(title = "SeaLevelDb")
TAD_df.LastDate.hvplot.hist().opts(title = "TAD Server")

SL_df['period_inactive'] = SL_df.apply(lambda x:(pd.Timestamp.now() - pd.Timestamp(x['LastDate']).tz_localize(None)).total_seconds()/3600/24/365.25, axis = 1)
TAD_df['period_inactive'] = TAD_df.apply(lambda x:(pd.Timestamp.now() - pd.Timestamp(x['LastDate']).tz_localize(None)).total_seconds()/3600/24/365.25, axis = 1)

all_stations = pd.concat([SL_df,TAD_df])
all_stations.period_inactive.hvplot.hist().opts(title = "years inactive")

all_stations.to_csv('TAD_SLDB_stations.csv')

plot_3 = SL_df.hvplot.points(
    x = 'lon',
    y='lat',
    c = 'period_inactive',
    s = 50,
    geo = True,
    cmap = 'fire_r',
    tiles="OSM",
).opts(height = 550, title = 'years inactive')

((plot2 * plot1).opts(height=550, title = 'Provider') + plot_3)

WARNING:param.GeoOverlayPlot03078: Due to internal constraints, when aspect and width/height is set, the bokeh backend uses those values as frame_width/frame_height instead. This ensures the aspect is respected, but means that the plot might be slightly larger than anticipated. Set the frame_width/frame_height explicitly to suppress this warning.
WARNING:param.GeoOverlayPlot03106: Due to internal constraints, when aspect and width/height is set, the bokeh backend uses those values as frame_width/frame_height instead. This ensures the aspect is respected, but means that the plot might be slightly larger than anticipated. Set the frame_width/frame_height explicitly to suppress this warning.

plot_4 = TAD_df.hvplot.points(
    x = 'lon',
    y='lat',
    c = 'period_inactive',
    line_color = "k",
    line_width = 0.25,
    cmap = 'fire_r',
    s = 50,
    geo = True,
    tiles="OSM",
).opts(title = "years inactive",height=550)

((plot2 * plot1).opts(height=550, title = 'Provider') + plot_4)

WARNING:param.GeoOverlayPlot03419: Due to internal constraints, when aspect and width/height is set, the bokeh backend uses those values as frame_width/frame_height instead. This ensures the aspect is respected, but means that the plot might be slightly larger than anticipated. Set the frame_width/frame_height explicitly to suppress this warning.
WARNING:param.GeoOverlayPlot03447: Due to internal constraints, when aspect and width/height is set, the bokeh backend uses those values as frame_width/frame_height instead. This ensures the aspect is respected, but means that the plot might be slightly larger than anticipated. Set the frame_width/frame_height explicitly to suppress this warning.

from bs4 import BeautifulSoup
# A function to scrape the "First Date" from the given URL
def get_first_date(device_number):
    print('.', end='')
    url = f"https://webcritech.jrc.ec.europa.eu/SeaLevelsDb/Device/{device_number}"
    response = requests.get(url)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        # Find all dl-horizontal elements, which contain dt and dd pairs
        dl_elements = soup.find_all('dl', class_='dl-horizontal')
        for dl in dl_elements:
            # Find all dt elements within the current dl element
            dt_elements = dl.find_all('dt')
            # print(dt_elements)
            for index, dt in enumerate(dt_elements):
                # Check if the dt element's text is 'First date'
                if dt.get_text().strip() == 'First date':
                    # Get the corresponding dd element that follows the dt element
                    first_date_dd = dl.find_all('dd')[index]
                    # print(first_date_dd.get_text().strip())
                    # 1/0
                    return first_date_dd.get_text().strip()
    return None

# Apply the function to each row and store the results in a new column
SL_df['FirstDate'] = SL_df['Id'].apply(get_first_date)

from searvey.multi import multithread

# rate_limit 
def fetch_SeaLevelDb_station(tmin: pd.Timestamp, tmax: pd.Timestamp, id: int, NSTEPS = 10000000):
    url = f"https://webcritech.jrc.ec.europa.eu/SeaLevelsDb/api/Device/{id}/Data?tMin={tmin.strftime('%Y-%m-%d')}&tMax={tmax.strftime('%Y-%m-%d')}&nPts={NSTEPS}&field=level&mode=CSV"
    stations_response = requests.get(url)
    if stations_response.ok:
        data = json.loads(stations_response.text)  # Use `loads` instead of `load`
        df = pd.DataFrame(data)
        return df
    else:
        print(f"Error: {stations_response.status_code}")
        return pd.DataFrame()


def fetch_TAD_station(tmin: pd.Timestamp, tmax: pd.Timestamp, id: int, NSTEPS = 10000000): 
    url = f"https://webcritech.jrc.ec.europa.eu/TAD_server/api/Data/Get/{id}?tMin={tmin.strftime('%Y-%m-%d')}&tMax={tmax.strftime('%Y-%m-%d')}&nRec={NSTEPS}&mode=CSV"
    stations_response = requests.get(url)
    if stations_response.ok:
        data = io.StringIO(stations_response.text)  # Create a text stream object
        df = pd.read_csv(data)  # Read the DataFrame from the text stream
        return df
    else:
        print(f"Error: {stations_response.status_code}")
        return pd.DataFrame()


def get_TAD_stations(df:pd.DataFrame, device_var: str):
    for s, station in df.iterrows():
        print(s, station)
        1/0


def get_SLDB_stations(df:pd.DataFrame, device_var: str):
    for s, station in df.iloc[::-1].iterrows():
        print(station)
        func_kwargs = []
        for tstep in pd.date_range(pd.Timestamp(2020, 1, 1), station.LastDate, freq='MS'):
            tmin = tstep
            tmax = tstep + relativedelta(months=+1)
            func_kwargs.append(
                dict(
                    tmin=tmin,
                    tmax=tmax,
                    Device=station[device_var],
                ),
            )
        results = multithread(
            func=fetch_SeaLevelDb_station,
            func_kwargs=func_kwargs,
            n_workers=10,
            print_exceptions=False,
            disable_progress_bar=False,
        )   
        for result in results:
            if result.result is not None:
                df = result.result
                print(df)
        1/0

get_SLDB_stations(SL_df, "Id")

Device = 94
url = f"https://webcritech.jrc.ec.europa.eu/TAD_server/api/Data/Get/{Device}?tMin=2024-08-01&tMax=2024-09-01&nRec={10000000}&mode=CSV"

stations_response = requests.get(url)
data = io.StringIO(stations_response.text)  # Create a text stream object
df = pd.read_csv(data)  # Read the DataFrame from the text stream
df

df.hvplot.line( y='Lev RAD (m)')

	Provider	Id	Name	lat	lon	LastAccessStatus	LastDate	State	SyncStatus	FileType	GroupId	MovAvgNp	Notes	Source
0	BIG	2896	Indonesia - Tolitoli (Central Sulawesi)	1.050620	120.800049	ERR: NO DATA TYPE	2020-01-23T15:10:00Z	active	ready	BIG	38	2	None	http://tides.big.go.id:8888/kacrut/0051TLTL02/...
1	BIG	2897	Indonesia - Mamuju (Western Sulawesi)	-2.666982	118.893349	ERR: NO DATA TYPE	2020-01-23T15:20:00Z	active	ready	BIG	38	2	None	http://tides.big.go.id:8888/kacrut/0009MMJU02/...
2	BIG	2898	Indonesia - Pantolan (Western Sulawesi)	-0.711140	119.856155	ERR: NO DATA TYPE	2020-01-23T15:20:00Z	active	ready	BIG	38	2	None	http://tides.big.go.id:8888/kacrut/0036PTLN02/...
3	BIG	2899	Indonesia - Kota Agung (Lampung)	-5.500444	104.619362	ERR: NO DATA TYPE	2020-01-23T15:20:00Z	active	ready	BIG	38	2	None	http://tides.big.go.id:8888/kacrut/0080KTAG01/...
4	BIG	2900	Indonesia - Krui (Lampung)	-5.183500	103.933052	ERR: NO DATA TYPE	2020-01-23T15:20:00Z	active	ready	BIG	38	2	None	http://tides.big.go.id:8888/kacrut/0100KRUI01/...
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
1313	TD UNESCO	2569	Ireland - New Ballyglass pier Belmullet	54.252998	-9.890000	OK	2024-09-19T10:55:00Z	active	ready	GLOSSxml	6	2	None	http://www.ioc-sealevelmonitoring.org/service....
1314	TD UNESCO	2915	Philippines - Currimao_Prs	18.017000	120.483002	OK	2024-09-18T15:18:00Z	active	ready	GLOSSxml	6	2	None	http://www.ioc-sealevelmonitoring.org/service....
1315	UNIFI	1872	Italy - Stromboli Sciara del Fuoco (1min)	38.798401	15.193100	OK	2013-01-14T14:28:47Z	active	None	UNIFI	21	-1	None	http://webcritech.jrc.ec.europa.eu/modelling/S...
1316	UNIFI	1873	Italy - Stromboli Sciara del Fuoco (15 sec)	38.798401	15.193100	OK	2014-04-26T13:29:45Z	active	None	TAD_T	21	-1	None	http://webcritech.jrc.ec.europa.eu/tad_server/...
1317	UNIFI	1874	Italy - Stromboli Sciara del Fuoco (5 sec)	38.798401	15.193100	OK	2013-01-14T14:29:36Z	inactive	None	UNIFI	21	-1	None	http://webcritech.jrc.ec.europa.eu/modelling/S...

	Provider	Id	Name	lat	lon	LastAccessStatus	LastDate	SyncStatus	State	FileType	GroupId	MovAvgNp	Notes	Source
0	JRC-ISPRA	64	IDSL-01	43.878340	8.018800	30 Jun 2023 13:01:02	2023-06-30 13:01:02	446 Days	inactive	None	None	None	None	None
1	JRC-IGN	79	IDSL-06	36.542140	-6.280612	19 Sep 2024 12:06:10	2024-09-19 12:06:10	16 Sec.	active	None	None	None	None	None
2	JRC-IGN	80	IDSL-07	37.567146	-0.978958	19 Sep 2024 11:58:10	2024-09-19 11:58:10	8 Min.	active	None	None	None	None	None
3	JRC-NOA	86	IDSL-13	37.942535	22.933611	19 Sep 2024 12:06:20	2024-09-19 12:06:20	6 Sec.	active	None	None	None	None	None
4	JRC-KOERI	87	IDSL-14	39.835741	26.075859	06 Dec 2023 13:40:02	2023-12-06 13:40:02	287 Days	inactive	None	None	None	None	None
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
1084	NOAA	313	NOAA-Wilmington	34.227500	-77.953611	29 Jan 2024 06:00:00	2024-01-29 06:00:00	234 Days	inactive	None	None	None	None	None
1085	NOAA	265	NOAA-Woods_Hole	41.523613	-70.671112	29 Jan 2024 06:00:00	2024-01-29 06:00:00	234 Days	inactive	None	None	None	None	None
1086	NOAA	314	NOAA-Wrightsville_Beach	34.213333	-77.786667	29 Jan 2024 06:00:00	2024-01-29 06:00:00	234 Days	inactive	None	None	None	None	None
1087	NOAA	447	NOAA-Yakutat	59.548333	-139.733056	29 Jan 2024 06:00:00	2024-01-29 06:00:00	234 Days	inactive	None	None	None	None	None
1088	NOAA	305	NOAA-Yorktown_USCG_Training_Center	37.226500	-76.478800	29 Jan 2024 06:00:00	2024-01-29 06:00:00	234 Days	inactive	None	None	None	None	None

	Time(UTC)	Lev RAD (m)	Panel (V)	Rms (m)	Sensor Temp (C)	Temperature (C)	Alert	Alert Signal (m)	Battery (V)	Forecast 30 (m)	Forecast 300 (m)
0	21 Aug 2024 04:28:27	4.291	-5.5	0.000	42.465	36.3	0.0	0.000	10.943	4.291	4.291
1	21 Aug 2024 04:28:33	4.289	-5.5	0.000	42.453	36.3	0.0	0.000	10.972	4.290	4.290
2	21 Aug 2024 04:28:39	4.296	-5.5	0.000	42.747	36.3	0.0	0.000	10.967	4.296	4.296
3	21 Aug 2024 04:28:45	4.298	-5.5	0.000	42.469	36.3	0.0	0.000	10.978	4.299	4.299
4	21 Aug 2024 04:28:51	4.285	-5.5	0.000	42.465	36.3	0.0	0.000	10.977	4.288	4.288
...	...	...	...	...	...	...	...	...	...	...	...
79909	31 Aug 2024 23:59:35	4.300	-5.5	0.014	44.464	48.7	0.0	0.025	11.431	4.274	4.249
79910	31 Aug 2024 23:59:41	4.241	-5.5	0.014	44.460	48.7	0.0	0.018	11.432	4.267	4.249
79911	31 Aug 2024 23:59:47	4.265	-5.5	0.014	44.455	48.7	0.0	0.014	11.425	4.263	4.249
79912	31 Aug 2024 23:59:53	4.257	-5.5	0.014	44.455	48.7	0.0	0.012	11.435	4.262	4.249
79913	31 Aug 2024 23:59:59	4.212	-5.5	0.014	44.447	48.7	0.0	0.004	11.430	4.252	4.249

Data availability¶

SeaLevelDB stations¶

Get Station detailed metadata (NOT FINISHED)¶