import os
import numpy as np
import pandas as pd
import xarray as xr
import holoviews as hv
import panel as pn
import param
import geopandas as gp
import shapely
import json
from holoviews import opts 
import geoviews as gv
import hvplot.pandas
from bokeh.models import HoverTool
from holoviews import streams
from holoviews.plotting.links import RangeToolLink
from holoviews.operation import histogram
from holoviews.operation.datashader import rasterize, spread
import bokeh.palettes as bp

from scipy.stats import linregress
from scipy.spatial import cKDTree

from typing import Tuple, Dict

hv.extension('bokeh')

versions = {
    'v0':   'results/2D/v0.nc',        
    'v0.2': 'results/2D/v0.2.nc',        
    # 'v1.2': 'results/2D/v0.2.nc', # for now we use v0.2 results
    # 'v2.0': 'results/2D/v0.2.nc', # for now we use v0.2 results
    # 'v2.1': 'results/2D/v0.2.nc', # for now we use v0.2 results
    # 'v2.2': 'results/2D/vnp.nan.2.nc', # for now we use v0.2 results
}

# time for the comparison
tmin = '2023-01-01'
tmax = '2023-12-31'

raw = pd.read_parquet('obs/raw/acnj.parquet')
clean = pd.read_parquet('obs/clean/acnj.parquet')

from analysea.tide import detide
surge = detide(clean[clean.columns[0]], lat=39.355) #lat for atlantic city

/home/tomsail/work/gist/Validation_Protocol2/.venv/lib/python3.10/site-packages/utide/harmonics.py:16: RuntimeWarning: invalid value encountered in cast
  nshallow = np.ma.masked_invalid(const.nshallow).astype(int)
/home/tomsail/work/gist/Validation_Protocol2/.venv/lib/python3.10/site-packages/utide/harmonics.py:17: RuntimeWarning: invalid value encountered in cast
  ishallow = np.ma.masked_invalid(const.ishallow).astype(int) - 1

solve: matrix prep ... solution ... done.

ts_view = dict(width = 1200, height = 800)
scatter_view = dict(width = 800, height = 800)
mod_opts_raster = dict(cmap=["blue"])
mod_opts = dict(color="blue")
obs_opts_raster = dict(cmap=["red"])
obs_opts = dict(color="red")

tmin_ = pd.Timestamp(2022,7,1)
tmax_ = pd.Timestamp(2022,8,1)
# 
raw_ = raw[tmin_:tmax_].hvplot(label='raw')
clean_ = clean[tmin_:tmax_].hvplot(label='clean')
surge_ = surge[tmin_:tmax_].hvplot(label='surge')
# 
(raw_ * clean_ * surge_).opts(show_legend=True, **ts_view)

seaset = pd.read_csv('https://raw.githubusercontent.com/tomsail/seaset/main/Notebooks/catalog_full.csv', index_col=0)
# seaset needs to be corrected because it does not contains all stations names

import glob
list_clean = glob.glob('*.parquet', root_dir = "obs/clean/")
ioc_cleanup_list = [item.split('.')[0] for item in list_clean]
surge_stations = seaset[seaset.ioc_code.isin(ioc_cleanup_list)]

for ii,ioc_code in enumerate(surge_stations.ioc_code):
    lat = surge_stations.iloc[ii].latitude
    if not os.path.exists(f"obs/surge/{ioc_code}.parquet"):
        df = pd.read_parquet(f"obs/clean/{ioc_code}.parquet")
        surge = detide(df[df.columns[0]], lat=lat)
        surge.to_frame().to_parquet(f"obs/surge/{ioc_code}.parquet")

def closest_n_points(nodes, N, meshXY, dist_max=np.inf):
    mytree = cKDTree(meshXY)
    d_, indice = mytree.query(nodes, range(1, N + 1))
    indice[d_ > dist_max] = -1
    mask = indice != -1
    return indice[mask].T, d_[mask].T


def extract_t_elev_2D(
        ds: xr.Dataset, 
        x: float, 
        y: float, 
        xstr: str = 'longitude', 
        ystr: str = 'latitude'
    )-> Tuple[pd.Series, float, float, float]:
    lons, lats = ds[xstr].values, ds[ystr].values
    indx, dist_ = closest_n_points(np.array([x, y]).T, 1, np.array([lons,lats]).T)
    ds_ = ds.isel(node=indx[0])
    elev_ = ds_.elev.values
    t_ = [pd.Timestamp(ti) for ti in ds_.time.values]
    return pd.Series(elev_, index=t_), np.round(dist_, 2), float(ds_[xstr]),  float(ds_[ystr])


def get_obs(folder : str, ioc_code: str, ext: str = ".parquet")->pd.Series: 
    obs = pd.read_parquet(f"{folder}/{ioc_code}{ext}")
    # hack
    obs = obs[obs.columns[0]]
    return obs

def get_model(model_file: str, ioc_code: str, catalog: pd.DataFrame)->Tuple[pd.Series, float, float]:
    # Extract model data and calculate correlation
    ds = xr.open_dataset(model_file)
    s = catalog[catalog.ioc_code == ioc_code]
    mod, d_, mlon, mlat = extract_t_elev_2D(ds, s.longitude.values[0], s.latitude.values[0], 'lon', 'lat')
    return mod, mlon, mlat

def get_bias(df1: pd.Series, df2: pd.Series, round:int = 3)->float: 
    bias = df1.mean() - df2.mean()
    return np.round(bias, round)


def get_mse(df1: pd.Series, df2: pd.Series, round: int = 3)->float: 
    mse = np.square(np.subtract(df2, df1)).mean()
    return np.round(mse, round)


def get_rmse(df1: pd.Series, df2: pd.Series, round:int = 3)->float:
    rmse = np.sqrt(get_mse(df1, df2, 10))
    return np.round(rmse, round)


def get_mae(df1: pd.Series, df2: pd.Series, round:int = 3)->float:
    mae = np.abs(np.subtract(df2, df1)).mean()
    return np.round(mae, round)

def get_mad(df1: pd.Series, df2: pd.Series, round:int = 3)->float:
    mae = np.abs(np.subtract(df2, df1)).std()
    return np.round(mae, round)


def get_madp(df1: pd.Series, df2: pd.Series, round:int = 3)->float:
    pc1, pc2 = get_percentiles(df1, df2)
    return get_mad(pc1, pc2, round)


def get_madc(df1: pd.Series, df2: pd.Series, round:int = 3)->float:
    madp = get_madp(df1, df2, round)
    return get_mad(df1, df2, round) + madp


def get_rms(df1: pd.Series, df2: pd.Series, round:int = 3)->float:
    crmsd = ((df1 - df1.mean()) - (df2 - df2.mean()))**2
    return  np.round(np.sqrt(crmsd.mean()), round)


def get_corr(df1: pd.Series, df2: pd.Series, round: int = 3)->float:
    corr = df1.corr(df2)
    return np.round(corr, round)


def get_nse(df1: pd.Series, df2: pd.Series, round: int = 3)->float:
    nse = 1 - np.nansum(np.subtract(df2, df1) ** 2) / np.nansum((df2 - np.nanmean(df2)) ** 2)
    return np.round(nse, round)


def get_lambda(df1: pd.Series, df2: pd.Series, round: int = 3)->float:
    Xmean = np.nanmean(df2)
    Ymean = np.nanmean(df1)
    nObs = len(df2)
    corr = get_corr(df1, df2, 10)
    if corr >= 0:
        kappa = 0
    else:
        kappa = 2 * abs(np.nansum((df2 - Xmean) * (df1 - Ymean)))

    Nomin = np.nansum((df2 - df1) ** 2)
    Denom = (
        np.nansum((df2 - Xmean) ** 2)
        + np.nansum((df1 - Ymean) ** 2)
        + nObs * ((Xmean - Ymean) ** 2)
        + kappa
    )
    lambda_index = 1 - Nomin / Denom
    return np.round(lambda_index, round)
                          

def get_kge(df1: pd.Series, df2: pd.Series, round: int =3)->float:
    corr = get_corr(df1, df2, 10)
    b = (df1.mean() - df2.mean())/df2.std()
    g = df1.std()/df2.std()
    kge = 1 - np.sqrt((corr-1)**2 + b**2 + (g-1)**2)
    return np.round(kge, round)


def align_ts(df1: pd.Series, df2: pd.Series)->Tuple[pd.Series, pd.Series]:
    ts1, ts2 = df1.align(df2, axis = 0)
    ts1 = ts1.interpolate()
    nan_mask1 = pd.isna(ts1)
    nan_mask2 = pd.isna(ts2)
    nan_mask = np.logical_or(nan_mask1.values, nan_mask2.values)
    ts1 = ts1[~nan_mask]
    ts2 = ts2[~nan_mask]
    return ts1, ts2


def get_percentiles(df1: pd.Series, df2: pd.Series, higher_tail:bool = False) -> Tuple[pd.Series, pd.Series]:
    x = np.arange(0, 0.99, 0.01)
    if higher_tail:
        x = np.hstack([x, np.arange(0.99, 1, 0.001)])
    pc1 = np.zeros(len(x))
    pc2 = np.zeros(len(x))
    for it, thd in enumerate(x):
        pc1[it] = df1.quantile(thd)
        pc2[it] = df2.quantile(thd)
    return pd.Series(pc1), pd.Series(pc2)


def get_stats(ts1: pd.Series, ts2: pd.Series)->Dict[str, float]:
    """
    it is STRONGLY advised to use : 
     * model data for ts1
     * observed data for ts2
    """
    version_stat = {
        "bias": get_bias(ts1, ts2),
        "rmse": get_rmse(ts1, ts2),
        "rms": get_rms(ts1, ts2),
        "mean_df1": np.round(ts1.mean(), 3),
        "mean_df2": np.round(ts2.mean(), 3),
        "std_df1": np.round(ts1.std(), 3),
        "std_df2": np.round(ts2.std(), 3),
        "nse": get_nse(ts1, ts2),
        "lamba": get_lambda(ts1, ts2),
        "cr": get_corr(ts1, ts2),
        "slope": linregress(ts1, ts2).slope,
        "intercept": linregress(ts1, ts2).intercept,
        "mad" : get_mad(ts1, ts2),
        "madp" : get_madp(ts1, ts2),
        "madc" : get_madc(ts1, ts2),
        "kge": get_kge(ts1, ts2)
    }
    return version_stat

def json_format(d):
    for key, value in d.items():
        if isinstance(value, (dict, list, tuple)):
            json_format(value)  # Recurse into nested dictionaries
        elif isinstance(value, np.ndarray):
            d[key] = value.tolist()  # Convert NumPy array to list
        elif isinstance(value, pd.Timestamp):
            d[key] = value.strftime("%Y-%m-%d %H:%M:%S")  # Convert pandas Timestamp to string
        elif isinstance(value, pd.Timedelta):
            d[key] = str(value)  # Convert pandas Timedelta to string
        else: 
            d[key] = str(value)
    return d

ts_folder = 'obs/surge'
model_file = 'results/2D/v0.nc'

ioc_code = 'horn'
obs = get_obs(ts_folder, ioc_code, '.parquet')
mod, slon, slat = get_model(model_file, ioc_code, surge_stations)
mod_, obs_ = align_ts(mod, obs)
stats_ = get_stats(mod_, obs_)

stats_

{'bias': 0.062,
 'rmse': 0.197,
 'rms': 0.187,
 'mean_df1': 0.072,
 'mean_df2': 0.01,
 'std_df1': 0.349,
 'std_df2': 0.315,
 'nse': 0.61,
 'lamba': 0.828,
 'cr': 0.847,
 'slope': 0.7633102349164613,
 'intercept': -0.044838491335618534,
 'mad': 0.132,
 'madp': 0.14,
 'madc': 0.272,
 'kge': 0.728}

from pyextremes import get_extremes

threshold = mod_.quantile(0.99)
print(f"threshold is: {np.round(threshold, 3)}")

threshold is: 1.274

ext_ = get_extremes(mod_, "POT", threshold=threshold, r="72H")
modeled_extremes = pd.DataFrame({"modeled" : ext_, "time_model" : ext_.index}, index=ext_.index)
modeled_extremes

/home/tomsail/work/gist/Validation_Protocol2/.venv/lib/python3.10/site-packages/pyextremes/extremes/peaks_over_threshold.py:18: FutureWarning: 'H' is deprecated and will be removed in a future version. Please use 'h' instead of 'H'.
  r = pd.to_timedelta(r)

obs_ = obs_[tmin:tmax]
threshold = min(1, obs_.quantile(0.99))
print(f"threshold is: {np.round(threshold, 3)}")
ext_ = get_extremes(obs_, "POT", threshold=threshold, r="72H")
observed_extremes = pd.DataFrame({"observed" : ext_, "time_obs" : ext_.index}, index=ext_.index)
observed_extremes

threshold is: 0.938

/home/tomsail/work/gist/Validation_Protocol2/.venv/lib/python3.10/site-packages/pyextremes/extremes/peaks_over_threshold.py:18: FutureWarning: 'H' is deprecated and will be removed in a future version. Please use 'h' instead of 'H'.
  r = pd.to_timedelta(r)

def plot_extreme_raster(ts:pd.Series, quantile: float, duration_cluster: int = 72, color = 'black', label = ""):
    """
    this function might induce overhead if the time series is too long
    """
    ext = get_extremes(ts, "POT", threshold=ts.quantile(quantile), r=f"{duration_cluster}H")
    ts_ = rasterize(hv.Curve(ts, label=label),line_width = 0.5).opts(cmap=[color], **ts_view, show_grid=True, alpha = 0.7,)
    sc_ = hv.Scatter(ext,label=label).opts(opts.Scatter(line_color="black", fill_color=color, size=8))
    th_ = hv.HLine(ts.quantile(quantile)).opts(opts.HLine(color=color, line_dash="dashed"))
    th_text_ = hv.Text(ts.index[int(len(ts)/2)],ts.quantile(quantile), f"{ts.quantile(quantile):.2f}")
    return ts_ * sc_ * th_ * th_text_

def plot_extreme(ts:pd.Series, quantile: float, duration_cluster: int = 72, color = 'k', label = ""):
    ext = get_extremes(ts, "POT", threshold=ts.quantile(quantile), r=f"{duration_cluster}H")
    ts_ = hv.Curve(ts,label=label).opts(color=color, **ts_view, show_grid=True, alpha = 0.7,)
    sc_ = hv.Scatter(ext,label=label).opts(opts.Scatter(line_color="black", fill_color=color, size=8))
    th_ = hv.HLine(ts.quantile(quantile), label=label).opts(opts.HLine(color=color, line_dash="dashed"))
    th_text_ = hv.Text(ts.index[int(len(ts)/2)],ts.quantile(quantile), f"{ts.quantile(quantile):.2f}")
    return ts_ * sc_ * th_ * th_text_


mod_plot = plot_extreme_raster(mod_, 0.9, color = 'blue', label = "model")
obs_plot = plot_extreme_raster(obs_, 0.9, color = 'red', label = "observed")

mod_plot * obs_plot

/home/tomsail/work/gist/Validation_Protocol2/.venv/lib/python3.10/site-packages/pyextremes/extremes/peaks_over_threshold.py:18: FutureWarning: 'H' is deprecated and will be removed in a future version. Please use 'h' instead of 'H'.
  r = pd.to_timedelta(r)
/home/tomsail/work/gist/Validation_Protocol2/.venv/lib/python3.10/site-packages/pyextremes/extremes/peaks_over_threshold.py:18: FutureWarning: 'H' is deprecated and will be removed in a future version. Please use 'h' instead of 'H'.
  r = pd.to_timedelta(r)

BokehModel(combine_events=True, render_bundle={'docs_json': {'57050375-7116-4051-880d-7e3bab9a5918': {'version…

extremes = pd.concat([modeled_extremes, observed_extremes], axis=1)
extremes = extremes.groupby(pd.Grouper(freq='3D')).mean().dropna(how='all')
extremes

extremes_match = extremes.groupby(pd.Grouper(freq='3D')).mean().dropna()
extremes_match

print(f"database match: {len(extremes_match)/len(extremes)*100}%, over {len(extremes)} total storms")

database match: 75.0%, over 8 total storms

extremes_match['difference'] = extremes_match['modeled'] - extremes_match['observed']
extremes_match['norm_diff'] = extremes_match['difference']/extremes_match['observed']
extremes_match['error'] = extremes_match["norm_diff"].abs()
# extremes_match['mean'] = (extremes_match['observed'] + extremes_match['modeled'])/2
extremes_match

# R1: diff for the biggest storm in each dataset
idx_max = extremes_match['observed'].idxmax()
R1 = extremes_match['error'][idx_max]
R1

0.07749058418374835

# R3: Difference between observed and modelled for the biggest storm
idx_max = extremes_match['observed'].nlargest(3).index
R3 = extremes_match['error'][idx_max].mean()
R3

0.20076577921404407

extremes_match['error'].mean()

0.23428668143624762

def get_extremes_ts(ts1: pd.Series, ts2: pd.Series, quantile: float, cluster_duration:int = 72): 
    # first ts
    threshold = ts1.quantile(quantile)
    ext_ = get_extremes(ts1, "POT", threshold=threshold, r=f"{cluster_duration}h")
    extremes1 = pd.DataFrame({"modeled" : ext_, "time_model" : ext_.index}, index=ext_.index)
    # second ts
    threshold = ts2.quantile(quantile)
    ext_ = get_extremes(ts2, "POT", threshold=threshold, r=f"{cluster_duration}h")
    extremes2 = pd.DataFrame({"observed" : ext_, "time_obs" : ext_.index}, index=ext_.index)
    extremes = pd.concat([extremes1, extremes2], axis=1)
    if extremes.empty:
        return pd.DataFrame()
    else: 
        extremes = extremes.groupby(pd.Grouper(freq='2D')).mean().dropna(how='all')
        return extremes


def match_extremes(extremes: pd.DataFrame):
    if extremes.empty:
        return pd.DataFrame()
    extremes_match = extremes.groupby(pd.Grouper(freq='2D')).mean().dropna()
    if len(extremes_match) == 0:
        return pd.DataFrame()
    else: 
        extremes_match['difference'] = extremes_match['observed'] - extremes_match['modeled']
        extremes_match['error'] = np.abs(extremes_match['difference']/extremes_match['observed'])
        extremes_match['error_m'] = extremes_match["error"] * extremes_match['observed']
        return extremes_match


def storm_metrics(ts1: pd.Series, ts2: pd.Series, quantile: float, cluster_duration:int = 72
)->Dict[str, float]:
    extremes = get_extremes_ts(ts1, ts2, quantile, cluster_duration)
    extremes_match = match_extremes(extremes)
    if extremes_match.empty:
        return {
            "db_match" : np.nan,
            "R1_norm": np.nan,
            "R1": np.nan,
            "R3_norm": np.nan,
            "R3": np.nan,
            "error": np.nan,
            "error_metric": np.nan
        }
    else: 
        # R1: diff for the biggest storm in each dataset
        idx_max = extremes_match['observed'].idxmax()
        R1_norm = extremes_match['error'][idx_max]
        R1 = extremes_match['difference'][idx_max]
        # R3: Difference between observed and modelled for the biggest storm
        idx_max = extremes_match['observed'].nlargest(3).index
        R3_norm = extremes_match['error'][idx_max].mean()
        R3 = extremes_match['difference'][idx_max].mean()
        metrics = {
            "db_match" : len(extremes_match)/len(extremes),
            "R1_norm": R1_norm, 
            "R1": R1, 
            "R3_norm": R3_norm, 
            "R3": R3, 
            "error": extremes_match['error'].mean(),
            "error_metric": extremes_match['error_m'].mean()
        }
        return metrics

get_extremes_ts(mod_, obs_, quantile=0.99, cluster_duration=72)

storm_metrics(mod_, obs_, quantile=0.99, cluster_duration=72)

{'db_match': 0.5555555555555556,
 'R1_norm': 0.07749058418374835,
 'R1': 0.14361372492432878,
 'R3_norm': 0.10733416331904089,
 'R3': -0.06444525713790306,
 'error': 0.20693310584122138,
 'error_metric': 0.2478145610799304}

storm_metrics(mod_, obs_, quantile=0.95, cluster_duration=72)

{'db_match': 0.45454545454545453,
 'R1_norm': 0.07749058418374835,
 'R1': 0.14361372492432878,
 'R3_norm': 0.10733416331904089,
 'R3': -0.06444525713790306,
 'error': 0.17300368730738716,
 'error_metric': 0.18139424685442088}

cmap_ = bp.Turbo256
def scatter_plot_raster(ts1: pd.Series, ts2: pd.Series, quantile: float, cluster_duration:int = 72, pp_plot: bool = False):
    extremes = get_extremes_ts(ts1, ts2, quantile, cluster_duration)
    extremes_match = match_extremes(extremes)
    p = hv.Points((ts1.values, ts2.values))
    sc_ = spread(rasterize(p)).opts(cmap=cmap_, cnorm='linear', alpha = 0.9, **scatter_view)
    if extremes_match.empty:
        ext_ = hv.Points((0,0))
    else: 
        ext_ = hv.Points((extremes_match['modeled'].values, extremes_match['observed']),label=f"extremes").opts(size = 8, fill_color='r', line_color = 'k')
    ax_plot = hv.Slope(1,0).opts(color='grey', show_grid=True)
    lr = linregress(ts1, ts2)
    lr_plot = hv.Slope(lr.slope,lr.intercept, label = f"y = {lr.slope:.2f}x + {lr.intercept:.2f}").opts(color='red',line_dash="dashed")
    # 
    if pp_plot: 
        pc1, pc2 = get_percentiles(ts1, ts2,higher_tail=True)
        ppp = hv.Scatter((pc1, pc2),('modeled', 'observed'), label=f"percentiles").opts(fill_color='g', line_color = 'b', size=10)
        return ax_plot * lr_plot * sc_ * ext_ * ppp
    else: 
        return ax_plot * lr_plot * sc_ * ext_
    
def scatter_plot(ts1: pd.Series, ts2: pd.Series, quantile: float, cluster_duration:int = 72, pp_plot: bool = False):
    extremes = get_extremes_ts(ts1, ts2, quantile, cluster_duration)
    extremes_match = match_extremes(extremes)
    p = hv.Points((ts1.values, ts2.values))
    sc_ = p.opts(alpha = 0.9, **scatter_view)
    if extremes_match.empty:
        ext_ = hv.Points((0,0))
    else: 
        ext_ = hv.Points((extremes_match['modeled'].values, extremes_match['observed']),label=f"extremes").opts(size = 8, fill_color='r', line_color = 'k')
    ax_plot = hv.Slope(1,0).opts(color='grey', show_grid=True)
    lr = linregress(ts1, ts2)
    lr_plot = hv.Slope(lr.slope,lr.intercept, label = f"y = {lr.slope:.2f}x + {lr.intercept:.2f}").opts(color='red',line_dash="dashed")

    # 
    if pp_plot: 
        pc1, pc2 = get_percentiles(ts1, ts2)
        ppp = hv.Scatter((pc1, pc2),('modeled', 'observed'), label=f"percentiles").opts(fill_color='g', line_color = 'k', alpha=0.9, size=8)
        return ax_plot * lr_plot * sc_ * ext_ * ppp
    else: 
        return ax_plot * lr_plot * sc_ * ext_
    
scatter_plot_raster(mod_, obs_, quantile=0.99, cluster_duration=72, pp_plot=True)

BokehModel(combine_events=True, render_bundle={'docs_json': {'3ff40836-5a18-46f7-90ed-1e2ba954dcdd': {'version…

ioc_code = 'viti'
obs = get_obs(ts_folder, ioc_code, '.parquet')
mod, slon, slat = get_model(model_file, ioc_code, surge_stations)
mod_, obs_ = align_ts(mod, obs)
stats_ = get_stats(mod_, obs_)

mod_plot = plot_extreme_raster(mod_, 0.99, color = 'blue', label='model')
obs_plot = plot_extreme_raster(obs_, 0.99, color ='red', label='obs')

mod_plot * obs_plot

/home/tomsail/work/gist/Validation_Protocol2/.venv/lib/python3.10/site-packages/pyextremes/extremes/peaks_over_threshold.py:18: FutureWarning: 'H' is deprecated and will be removed in a future version. Please use 'h' instead of 'H'.
  r = pd.to_timedelta(r)
/home/tomsail/work/gist/Validation_Protocol2/.venv/lib/python3.10/site-packages/pyextremes/extremes/peaks_over_threshold.py:18: FutureWarning: 'H' is deprecated and will be removed in a future version. Please use 'h' instead of 'H'.
  r = pd.to_timedelta(r)

BokehModel(combine_events=True, render_bundle={'docs_json': {'aa503d77-dcde-4163-ae53-7ad9d404e9f5': {'version…

scatter_plot_raster(mod_, obs_, quantile=0.99, cluster_duration=72)

BokehModel(combine_events=True, render_bundle={'docs_json': {'2531383d-7bbe-4dbf-82d9-1c3ade91eec4': {'version…

storm_metrics(mod_, obs_, quantile=0.99, cluster_duration=72)

{'db_match': nan,
 'R1_norm': nan,
 'R1': nan,
 'R3_norm': nan,
 'R3': nan,
 'error': nan,
 'error_metric': nan}

stats = {}
for v in versions.keys():
    model_file = versions[v]
    if v not in stats:
        stats[v] = {}
    for i_s, name in enumerate(surge_stations.Station_Name):
        ioc_code = surge_stations.iloc[i_s].ioc_code
        print(name, ioc_code)
        obs = get_obs(ts_folder, ioc_code, '.parquet')
        mod, mlon, mlat = get_model(model_file, ioc_code, surge_stations)
        mod_, obs_ = align_ts(mod, obs)
        stats_ = get_stats(mod_, obs_)
        # try: 
        # storm metrics
        metric99 = storm_metrics(mod_, obs_, quantile=0.99, cluster_duration=72)
        metric95 = storm_metrics(mod_, obs_, quantile=0.95, cluster_duration=72)
        # Create a dictionary for the current version's statistics
        stats_["obs_lat"]= surge_stations.iloc[i_s].latitude
        stats_["obs_lon"]= surge_stations.iloc[i_s].longitude
        stats_["mod_lat"]= float(mlat)
        stats_["mod_lon"]= float(mlon)
        stats_["R1"] = metric99["R1"]
        stats_["R1_norm"] = metric99["R1_norm"]
        stats_["R3"] = metric99["R3"]
        stats_["R3_norm"] = metric99["R3_norm"]
        stats_["error99"] = metric99["error"]
        stats_["error99m"] = metric99["error_metric"]
        stats_["error95"] = metric95["error"]
        stats_["error95m"] = metric95["error_metric"]
        # Create a dictionary for the current version's statistics
        stats[v][ioc_code] = stats_
        # except Exception as e:
        #     print(e)
        #     continue
with open(f'stats_all.json', 'w') as f:
    json.dump(json_format(stats), f, indent=2)

with open(f'stats_all.json') as f:
    stats = json.load(f)

oceans_ = gp.read_file('assets/world_oceans_final.json')
oceans_plot = oceans_.hvplot(color = 'name', alpha=0.5, height=800, width= 1200,cmap = 'glasbey', legend = False)
countries = gp.read_file(gp.datasets.get_path('naturalearth_lowres'))
map_ = countries.hvplot().opts(color='grey',line_alpha=0.9, tools=[])
good_obs = surge_stations[surge_stations.ioc_code.isin(stats['v0'].keys())]
obs_ = good_obs.hvplot.scatter(x = 'longitude', y = 'latitude', color = 'r', line_color='k', height=600, width= 1200, hover_cols=['ioc_code','Station_Name'])
oceans_plot * map_ * obs_

/tmp/ipykernel_5292/3880140165.py:3: FutureWarning: The geopandas.dataset module is deprecated and will be removed in GeoPandas 1.0. You can get the original 'naturalearth_lowres' data from https://www.naturalearthdata.com/downloads/110m-cultural-vectors/.
  countries = gp.read_file(gp.datasets.get_path('naturalearth_lowres'))

# Assuming oceans_ is already read and available as a GeoDataFrame and stations is the stations dataframe
def find_ocean_for_station(station, oceans_df, xstr = "longitude", ystr = "latitude"):
    # Create a Point object for the station's location
    point = gp.GeoSeries([shapely.Point(station[xstr], station[ystr])], crs="EPSG:4326")
    
    # Check for each ocean if the point is within it, and return the ocean's name
    for _, ocean in oceans_df.iterrows():
        if point.within(ocean['geometry']).any():
            return ocean['name']
    return None

# Apply the function to the stations dataframe
surge_stations['ocean'] = surge_stations.apply(lambda station: find_ocean_for_station(station, oceans_), axis=1)

ocean_counts = surge_stations['ocean'].value_counts().reset_index()
ocean_counts.columns = ['ocean', 'count']

hv_ocean_counts = hv.Dataset(ocean_counts)
ocean_histogram = hv_ocean_counts.to(hv.Bars, 'ocean', 'count')
ocean_histogram.opts(opts.Bars(width=1000, height=500, show_grid=True, tools=['hover'], xrotation=45))

/tmp/ipykernel_5292/4132528387.py:13: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  surge_stations['ocean'] = surge_stations.apply(lambda station: find_ocean_for_station(station, oceans_), axis=1)

# Initial plot with the first version selected
initial_version = list(stats.keys())[0]
df = pd.DataFrame(stats[initial_version]).T.reset_index()

def scatter_hist(src, x, y, z):
    p = hv.Points(src, kdims=[x,y]
                          ).hist(num_bins=90, dimension=[x, y]).opts(
            opts.Points(
                show_title=False, 
                tools=['hover','box_select', 'tap'], 
                size = 10, color=z, 
                cmap="rainbow4", 
                line_color='k',  
                # line_='Category20',
                # line_width=2,
                # show_legend = False, 
                colorbar = True, 
                ), 
            opts.Histogram(tools=['hover','box_select']),
            # opts.Layout(shared_axes=True, shared_datasource=True, merge_tools=True,),        
        )
    return p

# Assuming plot_signal and plot_model are defined as follows:
def plot_all(ioc_code, model_file, ts_folder):
    # get obs
    obs = get_obs(ts_folder, ioc_code, '.parquet')
    mod, mlon, mlat = get_model(model_file, ioc_code, surge_stations)
    mod_, obs_ = align_ts(mod, obs)
    mod_plot = plot_extreme(mod_, 0.95, color = 'blue', label='model')
    obs_plot = plot_extreme(obs_, 0.95, color ='red', label='obs')
    corr_plot = scatter_plot(mod_, obs_, quantile=0.99, cluster_duration=72)
    return obs_plot, mod_plot, corr_plot

def create_std_dev_circles(std_dev_range: np.ndarray) -> hv.Overlay:
    std_dev_circles = []
    for std in std_dev_range:
        angle = np.linspace(0, np.pi/2, 100)
        radius = np.full(100, std)
        x = radius * np.cos(angle)
        y = radius * np.sin(angle)
        std_dev_circles.append(
            hv.Curve((x, y)).opts(color='gray', line_dash='dotted', line_width=1)
        )
    return hv.Overlay(std_dev_circles)


def create_std_ref(radius: float) -> hv.Overlay:
    angle = np.linspace(0, np.pi/2, 100)
    x = radius * np.cos(angle)
    y = radius * np.sin(angle)
    return hv.Curve((x, y)).opts(color='gray', line_dash='dashed', line_width=2) * \
        hv.Text(radius, 0., f'REF', halign='right', valign='bottom').opts(
            text_font_size='10pt', text_color='gray')


def create_corr_lines(corr_range: np.ndarray, std_dev_max: float) -> hv.Overlay:
    corr_lines = []
    for corr in corr_range:
        theta = np.arccos(corr)
        radius = np.linspace(0, std_dev_max, 2)
        x = radius * np.cos(theta)
        y = radius * np.sin(theta)
        corr_lines.append(
            hv.Curve((x, y)).opts(color='blue', line_dash='dashed', line_width=1) *
            hv.Text(x[-1], y[-1], f'{corr:.2f}', halign='left', valign='bottom').opts(
                text_font_size='10pt', text_color='blue')
        )
    corr_label = hv.Text( 0.75 * std_dev_max, 0.75 * std_dev_max, f'Correlation Coefficient' ).opts( text_font_size='12pt', text_color='blue', angle=-45 )
    return hv.Overlay(corr_lines) * corr_label


def create_rms_contours(standard_ref: float, std_dev_max: float, rms_range: np.ndarray, norm:bool) -> hv.Overlay:
    rms_contours = []
    for rms in rms_range:
        angle = np.linspace(0, np.pi, 100)
        x = standard_ref + rms * np.cos(angle)
        y = rms * np.sin(angle)
        inside_max_std = np.sqrt(x**2 + y**2) < std_dev_max
        x[~inside_max_std] = np.nan
        y[~inside_max_std] = np.nan
        rms_contours.append(
            hv.Curve((x, y)).opts(color='green', line_dash='dashed', line_width=1) *
            hv.Text(standard_ref + rms * np.cos(2*np.pi/3), rms * np.sin(2*np.pi/3), f'{rms:.2f}', halign='left', valign='bottom').opts(
                text_font_size='10pt', text_color='green')
        )
    label = "RMS %" if norm else "RMS"
    rms_label = hv.Text( standard_ref, rms_range[1]*np.sin(np.pi/2), label, halign='left', valign='bottom' ).opts( text_font_size='11pt', text_color='green' )
    return hv.Overlay(rms_contours) * rms_label


def taylor_diagram(df: pd.DataFrame,
                   norm: bool = True, 
                   marker: str = "circle", 
                   color: str = "black", 
                   label: str = "Taylor Diagram"
    ) -> hv.Overlay:
    if df.empty:
        std_range = np.arange(0, 1.5, np.round(1/5, 2))
        corr_range = np.arange(0, 1, 0.1)
        rms_range = np.arange(0, 1.5, np.round(1/5, 2))
        std_dev_overlay = create_std_dev_circles(std_range) * create_std_ref(1)
        corr_lines_overlay = create_corr_lines(corr_range, std_range.max())
        rms_contours_overlay = create_rms_contours(1, std_range.max(), rms_range, norm=norm)
        return std_dev_overlay * corr_lines_overlay * rms_contours_overlay
    theta = np.arccos(df['cr'])  # Convert Cr to radians for polar plot
    if norm: 
        std_ref = 1
        std_mod = df['std_df1'] / df['std_df2']
    else: 
        if len(df) > 1:
            raise ValueError('for not normalised Taylor diagrams, you need only 1 data point')
        std_ref = df['std_df1'].mean()
        std_mod = df['std_df2'].mean()
    # 
    std_range = np.arange(0, 1.5 * std_ref, np.round(std_ref/5, 2))
    corr_range = np.arange(0, 1, 0.1)
    rms_range = np.arange(0, 1.5 * std_ref, np.round(std_ref/5, 2))

    std_dev_overlay = create_std_dev_circles(std_range) * create_std_ref(std_ref)
    corr_lines_overlay = create_corr_lines(corr_range, std_range.max())
    rms_contours_overlay = create_rms_contours(std_ref, std_range.max(), rms_range, norm=norm)

    x = std_mod * np.cos(theta)
    y = std_mod * np.sin(theta)
    df['x'] = x
    df['y'] = y
    df['rms_perc'] = df['rms'] / df['std_df2']
    # hover parameters
    tooltips = [
        ('Bias', '@bias'),
        ('Corr Coef (%)', '@cr'),
        ('RMSE (m)', '@rmse'),
        ('Centered RMS (m)', '@rms'),
        ('KGE', '@kge'),
        ('Std Dev Model (m)', '@std_df1'),
        ('Std Dev Measure (m)', '@std_df2'),
        ('Station (m)', '@ioc_code'),
        ('Ocean', '@ocean'),
    ]
    if norm: 
        tooltips.append(('RMS %', '@rms_perc'))
    hover = HoverTool(tooltips=tooltips)

    # Scatter plot for models with hover tool
    scatter_plot = hv.Points(
        df, ['x', 'y'],['cr', 'std_df1', 'std_df2', 'rms', 'rmse', 'rms_perc', 'ioc_code', 'ocean'],label=label
    ).opts(
        color=color,
        # cmap='Category20',
        # line_color='k',  
        line_width=1,
        marker = marker,
        size=8,  
        tools=[hover],
        default_tools=[],
        show_legend=True,
        hover_fill_color='firebrick',
        xlim=(0, std_range.max()*1.05),
        ylim=(0, std_range.max()*1.05),
        # clabel = 'ocean'
    )
    # Combine all the elements
    taylor_diagram = scatter_plot
    return taylor_diagram

def layout_fun(obs_, mod_, corr_):
    tgt = (obs_ * mod_ ).opts(width=ts_view['width'], labelled=['y'], show_grid=True)
    src = (obs_ * mod_ ).opts(width=ts_view['width'], height = 100,  yaxis=None, default_tools=[])
    RangeToolLink(src, tgt)
    ts_plot = (src + tgt).cols(1)
    # 
    corr_.opts(**scatter_view, show_grid=True)
    return (ts_plot + corr_).cols(1)


def stacked_hist(plot, element):
    """found here https://discourse.holoviz.org/t/stacked-histogram/6205/2"""
    offset = 0
    for r in plot.handles["plot"].renderers:
        r.glyph.bottom = "bottom"

        data = r.data_source.data
        new_offset = data["top"] + offset
        data["top"] = new_offset
        data["bottom"] = offset * np.ones_like(data["top"])
        offset = new_offset

    plot.handles["plot"].y_range.end = max(offset) * 1.1
    plot.handles["plot"].y_range.reset_end = max(offset) * 1.1



def hist_(src,z, g = 'ocean', map = None):
    if z in ['rmse', 'rms', 'bias']:
        range_ = (0,0.5)
    else: 
        range_ = (0,1)
    
    df = src[[z,g]].reset_index()
    # 
    unique_oceans = df[g].unique()
    # Create a new DataFrame with one-hot encoded structure
    rows = []
    for index, row in df.iterrows():
        new_row = {group: np.nan for group in unique_oceans}
        new_row[row[g]] = row[z]
        rows.append(new_row)
    one_hot_df = pd.DataFrame(rows, columns=unique_oceans)
    # 
    mean = src[z].mean()
    color_key = hv.Cycle('Category20').values
    # only way to get the colors to match the ocean mapping
    if map is None: 
        map = {ocean: color_key[i % len(color_key)] for i, ocean in enumerate(unique_oceans)}
    colors = [map[ocean] for ocean in unique_oceans]
    return one_hot_df.hvplot.hist(
        bins=20, 
        bin_range = range_,
        # cmap = ocean_mapping, 
        color = colors, 
        ).opts(
        hooks=[stacked_hist], 
        **scatter_view, 
        title = f"{z} mean: {mean:.2f}")

# Initial ts plot 
SURGE_FOLDER = "obs/surge/"
obs = pd.read_parquet(SURGE_FOLDER+"abed.parquet")
zeros = pd.Series(index=obs.index)
empt_ = hv.Curve(zeros)
empty_layout_ts = layout_fun(empt_, empt_, scatter_plot(zeros, zeros, 0.9))

/home/tomsail/work/gist/Validation_Protocol2/.venv/lib/python3.10/site-packages/pyextremes/extremes/peaks_over_threshold.py:100: UserWarning: Threshold value 'nan' is too high and results in zero extreme values
  warnings.warn(
/home/tomsail/work/gist/Validation_Protocol2/.venv/lib/python3.10/site-packages/pyextremes/extremes/peaks_over_threshold.py:100: UserWarning: Threshold value 'nan' is too high and results in zero extreme values
  warnings.warn(

# Define a class to encapsulate dashboard logic and state
import xarray as xr

class Dashboard(param.Parameterized):
    version =   param.Selector(objects=list(stats.keys()))
    parameter = param.Selector(objects=['rmse', 'rms', 'bias', 'kge', 'nse2', 'R1_norm', 'R3_norm','mad','madp', 'error95','error99','R1', 'R3','error95m','error99m', 'std_df2', 'nse2', 'lamba', 'cr'])
    selected_station = param.Integer(default=0)
    
    def __init__(self, **params):
        super().__init__(**params)
        self.oceans_ = gp.read_file('assets/world_oceans_final.json')
        self.df = pd.DataFrame()
        self.ds = None
        self.ocean_mapping = {}
        self.update_data()

    @param.depends('version', watch=True)
    def update_data(self):
        self.df = pd.DataFrame(stats[self.version]).T
        self.df = self.df.astype(float)
        self.df['ocean'] = self.df.apply(lambda station: find_ocean_for_station(station, oceans_, "obs_lon", "obs_lat"), axis=1)
        # Create a color mapping for oceans
        unique_oceans = self.df['ocean'].unique()
        color_key = hv.Cycle('Category20').values
        self.ocean_mapping = {ocean: color_key[i % len(color_key)] for i, ocean in enumerate(unique_oceans)}
        # Apply the color mapping to the oceans map
        self.map_ = self.oceans_[self.oceans_["name"].isin(unique_oceans)].hvplot(
            color='name',
            alpha=0.9,
            **ts_view,
            cmap=self.ocean_mapping,  # Use the color mapping dictionary
            tools=[],
            legend=False
        ) * map_
        self.df = self.df.dropna()
        self.df['ioc_code'] = self.df.index
        self.df.loc[self.df['nse'] > 0, 'nse2'] = self.df['nse']
        self.df.loc[self.df['nse'] < 0, 'nse2'] = 0
        self.file_ds = f"{versions[self.version]}"
        self.ds = xr.open_dataset(f"{versions[self.version]}")

    def update_ts(self, index):
        if not index:
            return empty_layout_ts
        else: 
            station_name = self.df.iloc[index[0]]['ioc_code']  # 'index' column holds the station names after reset_index
            obs_, mod_, corr_ = plot_all(station_name, self.file_ds, SURGE_FOLDER)
            layout = layout_fun(obs_, mod_, corr_)
            return layout
    
    @param.depends('version', 'parameter')
    def view(self):
        # Update the DataFrame based on the selected version
        self.update_data()
        
        # Your plotting logic here (simplified for this example)
        scatter_ = scatter_hist(self.df, 'obs_lon', 'obs_lat', self.parameter)
        
        stream = streams.Selection1D(source=scatter_[0])
        time_series = hv.DynamicMap(self.update_ts, streams=[stream])

        # Update the layout with new plots
        layout = pn.Column(self.map_ * scatter_, time_series.opts(shared_axes=False))
        # layout = pn.Column(scatter_)
        return layout

    @param.depends('version')
    def taylor(self):
        self.update_data()
        diagram = taylor_diagram(pd.DataFrame())
        for ocean in self.ocean_mapping.keys():
            df = self.df[self.df['ocean'] == ocean]
            diagram *= taylor_diagram(df, norm=True, color=self.ocean_mapping[ocean], label=ocean)
        return diagram.opts(**scatter_view, shared_axes=False)
    
    @param.depends('version', 'parameter')
    def hist(self):
        # Update the DataFrame based on the selected version
        self.update_data()
        hist = hist_(self.df, self.parameter, g = 'ocean', map = self.ocean_mapping)
        return hist.opts(shared_axes=False)


# Instantiate the dashboard and create the layout
dashboard = Dashboard()
layout = pn.Row(pn.Column(pn.Row(dashboard.param.version, 
                                 dashboard.param.parameter), 
                          dashboard.view), 
                pn.Column(dashboard.taylor, dashboard.hist))

# Serve the Panel app
pn.serve(layout)

/tmp/ipykernel_5292/701511128.py:92: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['x'] = x
/tmp/ipykernel_5292/701511128.py:93: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['y'] = y
/tmp/ipykernel_5292/701511128.py:94: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['rms_perc'] = df['rms'] / df['std_df2']
/tmp/ipykernel_5292/701511128.py:92: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['x'] = x
/tmp/ipykernel_5292/701511128.py:93: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['y'] = y
/tmp/ipykernel_5292/701511128.py:94: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['rms_perc'] = df['rms'] / df['std_df2']
/tmp/ipykernel_5292/701511128.py:92: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['x'] = x
/tmp/ipykernel_5292/701511128.py:93: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['y'] = y
/tmp/ipykernel_5292/701511128.py:94: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['rms_perc'] = df['rms'] / df['std_df2']
/tmp/ipykernel_5292/701511128.py:92: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['x'] = x
/tmp/ipykernel_5292/701511128.py:93: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['y'] = y
/tmp/ipykernel_5292/701511128.py:94: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['rms_perc'] = df['rms'] / df['std_df2']
/tmp/ipykernel_5292/701511128.py:92: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['x'] = x
/tmp/ipykernel_5292/701511128.py:93: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['y'] = y
/tmp/ipykernel_5292/701511128.py:94: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['rms_perc'] = df['rms'] / df['std_df2']
/tmp/ipykernel_5292/701511128.py:92: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['x'] = x
/tmp/ipykernel_5292/701511128.py:93: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['y'] = y
/tmp/ipykernel_5292/701511128.py:94: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['rms_perc'] = df['rms'] / df['std_df2']
/tmp/ipykernel_5292/701511128.py:92: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['x'] = x
/tmp/ipykernel_5292/701511128.py:93: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['y'] = y
/tmp/ipykernel_5292/701511128.py:94: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['rms_perc'] = df['rms'] / df['std_df2']
/tmp/ipykernel_5292/701511128.py:92: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['x'] = x
/tmp/ipykernel_5292/701511128.py:93: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['y'] = y
/tmp/ipykernel_5292/701511128.py:94: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['rms_perc'] = df['rms'] / df['std_df2']
/tmp/ipykernel_5292/701511128.py:92: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['x'] = x
/tmp/ipykernel_5292/701511128.py:93: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['y'] = y
/tmp/ipykernel_5292/701511128.py:94: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['rms_perc'] = df['rms'] / df['std_df2']
/tmp/ipykernel_5292/701511128.py:92: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['x'] = x
/tmp/ipykernel_5292/701511128.py:93: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['y'] = y
/tmp/ipykernel_5292/701511128.py:94: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['rms_perc'] = df['rms'] / df['std_df2']
/tmp/ipykernel_5292/701511128.py:92: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['x'] = x
/tmp/ipykernel_5292/701511128.py:93: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['y'] = y
/tmp/ipykernel_5292/701511128.py:94: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['rms_perc'] = df['rms'] / df['std_df2']
/tmp/ipykernel_5292/701511128.py:92: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['x'] = x
/tmp/ipykernel_5292/701511128.py:93: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['y'] = y
/tmp/ipykernel_5292/701511128.py:94: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['rms_perc'] = df['rms'] / df['std_df2']
/tmp/ipykernel_5292/701511128.py:92: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['x'] = x
/tmp/ipykernel_5292/701511128.py:93: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['y'] = y
/tmp/ipykernel_5292/701511128.py:94: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['rms_perc'] = df['rms'] / df['std_df2']
/tmp/ipykernel_5292/701511128.py:92: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['x'] = x
/tmp/ipykernel_5292/701511128.py:93: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['y'] = y
/tmp/ipykernel_5292/701511128.py:94: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['rms_perc'] = df['rms'] / df['std_df2']
/tmp/ipykernel_5292/701511128.py:92: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['x'] = x
/tmp/ipykernel_5292/701511128.py:93: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['y'] = y
/tmp/ipykernel_5292/701511128.py:94: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['rms_perc'] = df['rms'] / df['std_df2']
/tmp/ipykernel_5292/701511128.py:92: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['x'] = x
/tmp/ipykernel_5292/701511128.py:93: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['y'] = y
/tmp/ipykernel_5292/701511128.py:94: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['rms_perc'] = df['rms'] / df['std_df2']
/tmp/ipykernel_5292/701511128.py:92: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['x'] = x
/tmp/ipykernel_5292/701511128.py:93: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['y'] = y
/tmp/ipykernel_5292/701511128.py:94: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['rms_perc'] = df['rms'] / df['std_df2']
/tmp/ipykernel_5292/701511128.py:92: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['x'] = x
/tmp/ipykernel_5292/701511128.py:93: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['y'] = y
/tmp/ipykernel_5292/701511128.py:94: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['rms_perc'] = df['rms'] / df['std_df2']
/tmp/ipykernel_5292/701511128.py:92: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['x'] = x
/tmp/ipykernel_5292/701511128.py:93: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['y'] = y
/tmp/ipykernel_5292/701511128.py:94: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['rms_perc'] = df['rms'] / df['std_df2']
/tmp/ipykernel_5292/701511128.py:92: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['x'] = x
/tmp/ipykernel_5292/701511128.py:93: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['y'] = y
/tmp/ipykernel_5292/701511128.py:94: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['rms_perc'] = df['rms'] / df['std_df2']

Launching server at http://localhost:34605

<panel.io.server.Server at 0x7b693cc040a0>

WARNING:bokeh.core.validation.check:W-1005 (FIXED_SIZING_MODE): 'fixed' sizing mode requires width and height to be set: Column(id='p4038', ...)
WARNING:bokeh.core.validation.check:W-1005 (FIXED_SIZING_MODE): 'fixed' sizing mode requires width and height to be set: Column(id='p2954', ...)

	modeled	time_model
date-time
2023-01-15 15:00:00	1.709692	2023-01-15 15:00:00
2023-02-01 19:00:00	1.700684	2023-02-01 19:00:00
2023-02-17 17:00:00	1.499066	2023-02-17 17:00:00
2023-03-13 18:00:00	1.389214	2023-03-13 18:00:00
2023-09-19 18:00:00	1.335437	2023-09-19 18:00:00
2023-10-14 16:00:00	1.694628	2023-10-14 16:00:00

	observed	time_obs
date-time
2023-01-15 12:49:00	1.853305	2023-01-15 12:49:00
2023-01-30 13:27:00	1.240421	2023-01-30 13:27:00
2023-02-17 15:47:00	1.061365	2023-02-17 15:47:00
2023-03-07 08:00:00	1.144401	2023-03-07 08:00:00
2023-03-13 19:17:00	1.068405	2023-03-13 19:17:00
2023-09-19 18:24:00	1.224318	2023-09-19 18:24:00
2023-10-14 18:33:00	1.468797	2023-10-14 18:33:00
2023-10-30 07:56:00	0.993046	2023-10-30 07:56:00

	modeled	time_model	observed	time_obs
date-time
2023-01-15	1.709692	2023-01-15 15:00:00	1.853305	2023-01-15 12:49:00
2023-01-30	1.700684	2023-02-01 19:00:00	1.240421	2023-01-30 13:27:00
2023-02-17	1.499066	2023-02-17 17:00:00	1.061365	2023-02-17 15:47:00
2023-03-07	NaN	NaT	1.144401	2023-03-07 08:00:00
2023-03-13	1.389214	2023-03-13 18:00:00	1.068405	2023-03-13 19:17:00
2023-09-18	1.335437	2023-09-19 18:00:00	1.224318	2023-09-19 18:24:00
2023-10-12	1.694628	2023-10-14 16:00:00	1.468797	2023-10-14 18:33:00
2023-10-30	NaN	NaT	0.993046	2023-10-30 07:56:00

	modeled	time_model	observed	time_obs
date-time
2023-01-15	1.709692	2023-01-15 15:00:00	1.853305	2023-01-15 12:49:00
2023-01-30	1.700684	2023-02-01 19:00:00	1.240421	2023-01-30 13:27:00
2023-02-17	1.499066	2023-02-17 17:00:00	1.061365	2023-02-17 15:47:00
2023-03-13	1.389214	2023-03-13 18:00:00	1.068405	2023-03-13 19:17:00
2023-09-18	1.335437	2023-09-19 18:00:00	1.224318	2023-09-19 18:24:00
2023-10-12	1.694628	2023-10-14 16:00:00	1.468797	2023-10-14 18:33:00

	modeled	time_model	observed	time_obs	difference	norm_diff	error
date-time
2023-01-15	1.709692	2023-01-15 15:00:00	1.853305	2023-01-15 12:49:00	-0.143614	-0.077491	0.077491
2023-01-30	1.700684	2023-02-01 19:00:00	1.240421	2023-01-30 13:27:00	0.460264	0.371055	0.371055
2023-02-17	1.499066	2023-02-17 17:00:00	1.061365	2023-02-17 15:47:00	0.437700	0.412393	0.412393
2023-03-13	1.389214	2023-03-13 18:00:00	1.068405	2023-03-13 19:17:00	0.320809	0.300270	0.300270
2023-09-18	1.335437	2023-09-19 18:00:00	1.224318	2023-09-19 18:24:00	0.111119	0.090760	0.090760
2023-10-12	1.694628	2023-10-14 16:00:00	1.468797	2023-10-14 18:33:00	0.225831	0.153752	0.153752

first part: processing¶

model info¶

observations info¶

functions to extract TS from model¶

Stats functions¶

A. Dimensional Statistics:¶

1. Mean Error (or Bias)¶

2. RMSE (Root Mean Squared Error)¶

3. Mean-Absolute Error (MAE):¶

B. Dimentionless Statistics:¶

1. Performance Scores (PS) or Nash-Sutcliffe Coefficient (NSE): $$1 - \frac{\langle (x_c - x_m)^2 \rangle}{\langle (x_m - x_R)^2 \rangle}$$¶

2. Correlation Coefficient (R), values closer to 1 indicate better agreement:¶

3. Kling–Gupta Efficiency (KGE), values closer to 1 indicate better agreement.:¶

3. Lambda index ($\lambda$), values closer to 1 indicate better agreement:¶

Model vs observations¶

1 - Example for `horn`¶

storm detection¶

2 - Example for `viti` (Viti Levu, Fiji Islands):¶

all stations¶

second part: visualisation of the results¶

first part: processing¶

model info¶

observations info¶

functions to extract TS from model¶

Stats functions¶

A. Dimensional Statistics:¶

1. Mean Error (or Bias)¶

2. RMSE (Root Mean Squared Error)¶

3. Mean-Absolute Error (MAE):¶

B. Dimentionless Statistics:¶

1. Performance Scores (PS) or Nash-Sutcliffe Coefficient (NSE): $$1 - \frac{\langle (x_c - x_m)^2 \rangle}{\langle (x_m - x_R)^2 \rangle}$$¶

2. Correlation Coefficient (R), values closer to 1 indicate better agreement:¶

3. Kling–Gupta Efficiency (KGE), values closer to 1 indicate better agreement.:¶

3. Lambda index ($\lambda$), values closer to 1 indicate better agreement:¶

Model vs observations¶

1 - Example for horn¶

storm detection¶

2 - Example for viti (Viti Levu, Fiji Islands):¶

all stations¶

second part: visualisation of the results¶

1 - Example for `horn`¶

2 - Example for `viti` (Viti Levu, Fiji Islands):¶