import glob
import hvplot.pandas
import hvplot.xarray
import numpy as np
import os
import pandas as pd
import pyfes
import searvey
import utide
import xarray as xr

from pytides2.tide import Tide

import ioc_cleanup as C

UTIDE_OPTS = {
    "constit": "auto", 
    "method": "ols", 
    "order_constit": "frequency",
    "Rayleigh_min": 0.97,  # High threshold for constituent resolution
    "verbose": False
}

FES_CONSTITUENTS = [
    "M2", "S2", "N2", "K2", "2N2", "L2", "T2", "R2", "NU2", "MU2", "EPS2", "LAMBDA2", # Semi-diurnal (twice daily)
    "K1", "O1", "P1", "Q1", "J1", "S1", # Diurnal (once daily)
    "MF", "MM", "MSF", "SA", "SSA", "MSQM", "MTM", # Long period (fortnightly to annual)
    "M4", "MS4", "M6", "MN4", "N4", "S4", "M8", "M3", "MKS2" # Short period (higher harmonics)
]

DATA_FOLDER = "data"

# for FES time series reconstruction
START = np.datetime64('2022-01-01T00:00:00')
END = np.datetime64("2022-04-01T00:00:00")
STEP = np.timedelta64(20, "m")
DATES = np.arange(START, END+STEP, STEP)

FES_M2 = xr.open_dataset("/home/tomsail/work/FES/load_tide/m2_fes2022.nc")

def data_availability(series: pd.Series, freq="60min") -> float:
    resampled = series.resample(freq).mean()
    data_avail_ratio = 1 - resampled.isna().sum() / len(resampled)
    return float(data_avail_ratio)

def utide_get_coefs(ts: pd.Series, lat: float, resample: int = None)-> dict: 
    UTIDE_OPTS["lat"] = lat
    if resample is not None:
        ts = ts.resample(f"{resample}min").mean()
        ts = ts.shift(freq=f"{resample / 2}min")  # Center the resampled points
    return utide.solve(ts.index, ts, **UTIDE_OPTS)


def utide_surge(ts: pd.Series, lat: float, resample: int = None)-> pd.Series: 
    ts0 = ts.copy()
    coef = utide_get_coefs(ts, lat, resample)
    tidal = utide.reconstruct(ts0.index, coef, verbose = UTIDE_OPTS["verbose"])
    return pd.Series(data=ts0.values - tidal.h, index = ts0.index)


def utide_to_df(utide_coef: utide.utilities.Bunch) -> pd.DataFrame:
    return pd.DataFrame({ 
        "amplitude": utide_coef["A"],
        "phase": utide_coef["g"],
        "amplitude_CI": utide_coef["A_ci"],
        "phase_CI": utide_coef["g_ci"]
    }, index=utide_coef["name"])

def pytide_get_coefs(ts: pd.Series, resample: int = None) -> dict:
    if resample is not None:
        ts = ts.resample(f"{resample}min").mean()
        ts = ts.shift(freq=f"{resample / 2}min")  # Center the resampled points
    ts = ts.dropna()
    return Tide.decompose(ts, ts.index.to_pydatetime())[0]


def pytides_surge(ts: pd.Series, resample: int = None)-> pd.Series:
    ts0 = ts.copy()
    tide = pytide_get_coefs(ts, resample)
    t0 = ts.index.to_pydatetime()[0]
    hours = (ts.index - ts.index[0]).total_seconds()/3600
    times = Tide._times(t0, hours)
    return pd.Series(ts0.values - tide.at(times), index=ts0.index)

def pytides_to_df(pytides_tide: Tide)-> pd.DataFrame:
    constituent_names = [c.name.upper() for c in pytides_tide.model['constituent']]
    return pd.DataFrame(pytides_tide.model, index=constituent_names).drop('constituent', axis=1)

def load_fes_yaml(path):
    if os.path.exists(path):
        return path
    else:
        raise ValueError(f"FES Yaml not found in {path}")

def get_pyfes_cfg(yaml_file):
    cfg = pyfes.load_config(load_fes_yaml(yaml_file))
    return cfg


def reduce_coef_to_fes(df: pd.DataFrame, cnst: list, verbose: bool = False):
    res = pd.DataFrame(0.0, index=cnst, columns=df.columns)
    common_constituents = df.index.intersection(cnst)
    res.loc[common_constituents] = df.loc[common_constituents]
    
    not_in_fes_df = df[~df.index.isin(cnst)]
    not_in_fes = not_in_fes_df.index.tolist()
    not_in_fes_amps = not_in_fes_df["amplitude"].round(3).tolist()
    missing_fes = set(cnst) - set(df.index)
    
    if verbose:
        print(f"Constituents found but not in FES: {not_in_fes}")
        print(f"Their amplitudes: {not_in_fes_amps}")
        if missing_fes:
            print(f"FES constituents missing from analysis (set to 0): {sorted(missing_fes)}")
    
    return res


def get_constituent_name(constituent_enum):
    name = constituent_enum.name
    if name.startswith('k'):
        name = name[1:].upper()
    return name


def get_constituents_fes(cfg, lon, lat):
    tide_dict = cfg['tide'].interpolate([lon], [lat])[0]
    result = {}
    for constituent, value_array in tide_dict.items():
        value = value_array[0]
        amplitude = np.abs(value)
        phase = np.mod(np.rad2deg(np.angle(value)), 360)
        name = get_constituent_name(constituent)
        result[name] = {
            'amplitude': amplitude/100,
            'phase': phase
        }
    return pd.DataFrame(result).T


def fes_reconstruct(lon, lat, dates, cfg, num_threads=1):
    lons = np.full(dates.shape, lon)
    lats = np.full(dates.shape, lat)
    tide, lp, qc = pyfes.evaluate_tide(cfg['tide'], dates, lons, lats, num_threads=num_threads)
    load, load_lp, qc_lp = pyfes.evaluate_tide(cfg['radial'], dates, lons, lats, num_threads=num_threads)
    geocentric_tide = tide + load + lp 
    df = pd.DataFrame(
        {
            "tide": tide,
            "lp": lp,
            "qc": qc,
            "load": load,
            "load_lp": load_lp,
            "qc_lp": qc_lp,
            "geocentric": geocentric_tide,
        },
        index=dates,
    )
    df.attrs["lon"] = lon
    df.attrs["lat"] = lat
    return df["geocentric"]

def sim_on_obs(sim, obs):
    obs = pd.Series(obs, name="obs")
    sim = pd.Series(sim, name="sim")
    df = pd.merge(sim, obs, left_index=True, right_index=True, how="outer")
    df["sim"] = df["sim"].interpolate(method="linear", limit_direction="both")
    df = df.dropna(subset=["obs"])
    sim_ = df["sim"].drop_duplicates()
    obs_ = df["obs"].drop_duplicates()
    return sim_, obs_


def compute_score(corr: float, rss: float) -> float:
    return np.max([0, corr]) * (1 - np.min([rss, 1]))

def concat_tides_constituents(dict_tides):
    multi_df = pd.concat(dict_tides)
    multi_df.index.names = ['method', 'constituent']
    multi_df = multi_df.swaplevel().sort_index()

    available_constituents = multi_df.index.get_level_values('constituent').unique()
    filtered_order = [c for c in FES_CONSTITUENTS if c in available_constituents][::-1]
    return  multi_df.reindex(filtered_order, level='constituent')

def get_tidal_ts(station, ioc_df, fes_cfg, rsp = 20):
    _lon = ioc_df[ioc_df.ioc_code == station].lon.values[0]
    _lat = ioc_df[ioc_df.ioc_code == station].lat.values[0]
    obs_file = glob.glob(f"data/{station}_*.parquet")[0]
    ts = pd.read_parquet(obs_file)
    ts = ts[ts.columns[0]]
    # constituents
    ut = utide_get_coefs(ts, _lat, resample=rsp)
    utide_reduced_coef = reduce_coef_to_fes(utide_to_df(ut), FES_CONSTITUENTS)
    pt = pytide_get_coefs(ts, rsp)
    pytides_reduced_coef = reduce_coef_to_fes(pytides_to_df(pt), FES_CONSTITUENTS)
    fes_df = get_constituents_fes(fes_cfg, _lon, _lat)
    tide_ = concat_tides_constituents({
        "fes":fes_df, 
        "utide":utide_reduced_coef, 
        "pytides": pytides_reduced_coef
    })
    # time domain
    ts_fes = fes_reconstruct(lon=_lon, lat=_lat, dates=DATES, cfg=fes_cfg)
    ts_fes_obs, _ = sim_on_obs(ts_fes, ts.loc[START:END])
    df_obs_fes = pd.concat({
        "fes": ts_fes_obs/100, 
        "obs": ts.loc[START: END], 
        }, axis=1)
    return tide_, df_obs_fes

station = "rosc"
sensor = "rad"

raw = searvey.fetch_ioc_station( 
    station, 
    "2000-01-01", 
    pd.Timestamp.now()
)
raw.describe()

# Get station metadata
ioc_df = searvey.get_ioc_stations()
_lat = ioc_df[ioc_df.ioc_code == station].lat.values[0]

station_info = ioc_df[ioc_df.ioc_code == station]
print(f"Station: {station_info['location'].values[0]}")
print(f"Latitude: {_lat:.4f}°N")
print(f"Longitude: {station_info['lon'].values[0]:.4f}°E")
print(f"Country: {station_info['country'].values[0]}")

Station: Roscoff
Latitude: 48.7190°N
Longitude: -3.9660°E
Country: France

!mkdir -p transformations
import requests
response = requests.get(f'https://raw.githubusercontent.com/seareport/ioc_cleanup/refs/heads/ioc_2024/transformations/{station}_{sensor}.json')
with open(f'transformations/{station}_{sensor}.json', 'wb') as f:
    f.write(response.content)

34930

! head -20 "transformations/{station}_{sensor}.json"

{
  "ioc_code": "rosc",
  "sensor": "rad",
  "notes": "",
  "skip": false,
  "wip": false,
  "start": "2022-01-01T00:00:00",
  "end": "2025-01-01T00:00:00",
  "high": null,
  "low": null,
  "dropped_date_ranges": [
    ["2022-03-27T03:00:00", "2022-03-27T03:59:00"],
    ["2022-12-02T13:35:00", "2022-12-02T13:39:00"],
    ["2023-03-26T03:00:00", "2023-03-26T03:59:00"]
  ],
  "dropped_timestamps": [
    "2022-09-01T01:57:00",
    "2023-05-01T09:03:00",
    "2023-05-01T09:04:00",
    "2023-05-01T09:05:00",

# Load and apply quality control transformations
trans = C.load_transformation(station, sensor)
cleaned_data = C.transform(raw, trans)
ts = cleaned_data[sensor]

print(f"Data cleaning complete!")
print(f"Original data points: {len(raw)}")
print(f"Cleaned data points: {len(ts)}")
print(f"Data availability: {data_availability(ts):.1%}")
print(f"Time range raw: {raw.index.min()} to {raw.index.max()}")
print(f"Time range clean: {ts.index.min()} to {ts.index.max()}")

Data cleaning complete!
Original data points: 7679029
Cleaned data points: 1548102
Data availability: 99.6%
Time range raw: 2009-09-17 23:56:24 to 2025-07-17 14:35:00
Time range clean: 2022-01-01 00:01:00 to 2025-01-01 00:00:00

out = utide_get_coefs(ts, _lat, resample=20)
print(f"Found {len(out['name'])} tidal constituents")

Found 68 tidal constituents

print("Top 20 tidal constituents by amplitude (UTide):")
print(utide_to_df(out).sort_values('amplitude', ascending=False).head(20))

Top 20 tidal constituents by amplitude (UTide):
      amplitude       phase  amplitude_CI   phase_CI
M2     2.698727  141.712703      0.003081   0.065412
S2     1.015954  187.427755      0.003081   0.173769
N2     0.533240  123.751469      0.003081   0.331070
K2     0.291735  184.513174      0.003081   0.605060
MU2    0.141442  150.748488      0.003081   1.248147
M4     0.119954  140.948298      0.001153   0.550953
NU2    0.100100  111.967316      0.003081   1.763576
L2     0.096260  138.833846      0.003081   1.833901
2N2    0.088861  113.525695      0.003081   1.986781
K1     0.081898   81.700949      0.001277   0.894096
MS4    0.081814  191.957568      0.001153   0.807797
O1     0.074173  333.543720      0.001278   0.986714
SA     0.060349  304.358930      0.016419  15.386364
T2     0.054464  175.609556      0.003081   3.241401
MN4    0.047274  113.279430      0.001153   1.397993
LDA2   0.046476  110.731276      0.003081   3.798172
SSA    0.046149   79.602077      0.016350  20.206857
EPS2   0.035734  129.866658      0.003081   4.940098
MSN2   0.029038  332.005902      0.003081   6.079230
P1     0.028376   72.451468      0.001278   2.579763

out_pytides = pytide_get_coefs(ts, 20)

/home/tomsail/work/gist/tide_best_practice/.venv/lib/python3.11/site-packages/pytides2/tide.py:438: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
  heights = heights[sort]

out_pytides = pytide_get_coefs(ts, 20)
print(f"Found {len(out_pytides.model['constituent'])} tidal constituents")

Found 38 tidal constituents

print("Top 20 tidal constituents by amplitude (PyTides):")
print(pytides_to_df(out_pytides).sort_values('amplitude', ascending=False).head(20))

Top 20 tidal constituents by amplitude (PyTides):
         amplitude       phase
Z0        5.366716    0.000000
M2        2.696960  141.810166
S2        1.018143  187.379524
N2        0.533421  123.834273
K2        0.292225  184.647714
MU2       0.143417  148.931139
M4        0.119931  141.134845
NU2       0.099544  112.237101
L2        0.096016  138.895029
2N2       0.083984  106.947072
K1        0.081825   81.773947
MS4       0.081581  192.032615
O1        0.074254  333.333368
SA        0.068742  222.775933
T2        0.054274  175.715530
SSA       0.050564   82.881197
MN4       0.047301  113.398872
LAMBDA2   0.046093  111.441733
2SM2      0.035839    5.226947
P1        0.028065   72.881608

fes_cfg = get_pyfes_cfg("fes2022.yaml")

fes_df = get_constituents_fes(fes_cfg, station_info['lon'].values[0], station_info['lat'].values[0])

pytides_reduced_coef = reduce_coef_to_fes(pytides_to_df(out_pytides), FES_CONSTITUENTS)
pytides_reduced_coef.head(10)

utide_reduced_coef = reduce_coef_to_fes(utide_to_df(out), FES_CONSTITUENTS)
utide_reduced_coef.head(10)

fes_df.head(10)

tide_ = concat_tides_constituents({
    "fes":fes_df, 
    "utide":utide_reduced_coef, 
    "pytides": pytides_reduced_coef
})
tide_

def plot_comparative_amplitudes(df, station):
    return df.amplitude.hvplot.barh(
        ylabel="Tidal Constituent",
        xlabel="Amplitude (meters)", 
        by="method", 
        grid=True,
        title=f"Tidal Amplitudes: UTide vs PyTide, station {station}",
        legend='top_right',
        rot=90
    ).opts(
        height=1000, 
        width=1000,
        fontsize={'title': 15, 'labels': 12, 'xticks': 8, 'yticks': 8}
    )

plot_comparative_amplitudes(tide_, station)

def compute_rss(df:pd.DataFrame, param:str, a:str, b:str):
    df_ = df[param].unstack(level='method')
    df_["rss"] = (df_[a] - df_[b])**2
    return df_["rss"].sum()


print(f"utide rss for {station} is {compute_rss(tide_, 'amplitude', 'utide', 'fes'):.3f}")
print(f"pytides rss for {station} is {compute_rss(tide_, 'amplitude', 'pytides', 'fes'):.3f}")

utide rss for rosc is 0.009
pytides rss for rosc is 0.009

res = {}
for path in glob.glob("data/*parquet"): 
    ts = pd.read_parquet(path)
    ts = ts[ts.columns[0]]
    root, file_ext = os.path.split(path)
    file, ext = os.path.splitext(file_ext)
    station, sensor = file.split("_")
    _lon = ioc_df[ioc_df.ioc_code == station].lon.values[0]
    _lat = ioc_df[ioc_df.ioc_code == station].lat.values[0]
    try: 
        # constituents
        ut = utide_get_coefs(ts, _lat, resample=20)
        utide_reduced_coef = reduce_coef_to_fes(utide_to_df(ut), FES_CONSTITUENTS)
        pt = pytide_get_coefs(ts, 20.)
        pytides_reduced_coef = reduce_coef_to_fes(pytides_to_df(pt), FES_CONSTITUENTS)
        fes_df = get_constituents_fes(fes_cfg, _lon, _lat)
        tide_ = concat_tides_constituents({
            "fes":fes_df, 
            "utide":utide_reduced_coef, 
            "pytides": pytides_reduced_coef
        })
        rss_utide = compute_rss(tide_, "amplitude", "utide", "fes")
        rss_pytides = compute_rss(tide_, "amplitude", "pytides", "fes")
        # time domain
        ts_fes = fes_reconstruct(lon=_lon, lat=_lat, dates=DATES, cfg=fes_cfg)
        ts_fes_obs, _ = sim_on_obs(ts_fes, ts.loc[START:END])
        df_sim_obs_fes = pd.concat({
            "fes": ts_fes_obs/100, 
            "obs": ts.loc[START: END], 
            }, axis=1)
        corr_matrix = df_sim_obs_fes.corr(method="pearson")
        corr_fes = corr_matrix.loc["fes", "obs"]
        score_utide = compute_score(corr_fes, float(rss_utide))
        score_pytides = compute_score(corr_fes, float(rss_pytides))
        # results for station
        res[station] = {
            "ioc_code": station,
            "lat": _lat,
            "lon": _lon,
            "rss_utide": rss_utide,
            "rss_pytides": rss_pytides,
            "corr_fes": corr_fes,
            "score_utide": score_utide,
            "score_pytides": score_pytides
        }
        # print(f"rss for {station} is {rss:.4f}")
    except Exception as e:
        print(f"couldn't process {station}: {e}")

rss_df = pd.DataFrame(res).T
rss_df.score_utide = rss_df.score_utide.astype(float)
rss_df.score_pytides = rss_df.score_pytides.astype(float)
rss_df.rss_utide = rss_df.rss_utide.astype(float)
rss_df.rss_pytides = rss_df.rss_pytides.astype(float)
rss_df["corr_fes"] = rss_df["corr_fes"].astype(float)
rss_df

rss_df.hvplot.points(
    x="lon",
    y="lat",
    c="rss_utide",
    hover_cols = ['ioc_code'],
    s=100,
    geo = True,
    tiles = True,
    clim=(0,0.2)
).opts(width = 1000, height = 800, title = "RSS difference between UTide and FES constituents")

rss_df.hvplot.points(
    x="lon",
    y="lat",
    c="rss_pytides",
    hover_cols = ['ioc_code'],
    s=100,
    geo = True,
    tiles = True,
    clim=(0,0.2)
).opts(width = 1000, height = 800, title = "RSS difference between pytides and FES constituents")

station= "anch2"

tide_, ts_fes_obs = get_tidal_ts(station, ioc_df, fes_cfg)
plot_comparative_amplitudes(tide_, station) + ts_fes_obs.resample("20min").mean().shift(freq="10min").hvplot().opts(height=800)

/home/tomsail/work/gist/tide_best_practice/.venv/lib/python3.11/site-packages/pytides2/tide.py:438: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
  heights = heights[sort]

M2 = xr.open_dataset("/home/tomsail/work/FES/load_tide/m2_fes2022.nc")
M2_subset = M2.sel(lon=slice(205, 215),lat=slice(58,62))
(M2_subset.amplitude.hvplot.image(
    geo=True,
    alpha=0.9,
    tiles=True,
    cmap='rainbow4',
    clim = (0,3)
)*M2_subset.phase.hvplot.contour(
    geo=True,
)*ioc_df[ioc_df.ioc_code == station].hvplot.points(
    x="lon",
    y="lat", 
    geo=True,
    color = "r",
    line_color='k',
    s=100,
    hover_cols="ioc_code"
)).opts(width=1200, height=800)

station= "live"

tide_, ts_fes_obs = get_tidal_ts(station, ioc_df, fes_cfg)
plot_comparative_amplitudes(tide_, station) + ts_fes_obs.resample("20min").mean().shift(freq="10min").hvplot().opts(height=800)

/home/tomsail/work/gist/tide_best_practice/.venv/lib/python3.11/site-packages/pytides2/tide.py:438: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
  heights = heights[sort]

M2_subset = M2.sel(lon=slice(350, 360),lat=slice(52,56))
(M2_subset.amplitude.hvplot.image(
    geo=True,
    alpha=0.7,
    tiles=True,
    cmap='rainbow4',
    clim = (0,3)
)*M2_subset.phase.hvplot.contour(
    geo=True,
)*ioc_df[ioc_df.ioc_code == station].hvplot.points(
    x="lon",
    y="lat", 
    geo=True,
    color = "r",
    line_color='k',
    s=100,
    hover_cols="ioc_code"
)).opts(width=1200, height=800)

rss_df.hvplot.points(
    x="lon",
    y="lat",
    c="corr_fes",
    hover_cols = ['ioc_code'],
    s=100,
    cmap="rainbow4_r",
    geo = True,
    tiles = True,
).opts(width = 1000, height = 800, title = "correlation between obs and FES reconstructed signal")

station= "furu"

tide_, ts_fes_obs = get_tidal_ts(station, ioc_df, fes_cfg)
plot_comparative_amplitudes(tide_, station) + ts_fes_obs.resample("20min").mean().shift(freq="10min").hvplot().opts(height=800)

/home/tomsail/work/gist/tide_best_practice/.venv/lib/python3.11/site-packages/pytides2/tide.py:438: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
  heights = heights[sort]

# we need to take a "total water level" observed signal
file = glob.glob(f"twl/{station}*.parquet")
ts = pd.read_parquet(file)

time
2022-01-01 00:01:00   -0.125744
2022-01-01 00:02:00   -0.119744
2022-01-01 00:03:00   -0.115744
2022-01-01 00:04:00   -0.119744
2022-01-01 00:05:00   -0.123744
                         ...   
2024-12-30 23:56:00    0.351256
2024-12-30 23:57:00    0.345256
2024-12-30 23:58:00    0.342256
2024-12-30 23:59:00    0.346256
2024-12-31 00:00:00    0.350256
Name: rad, Length: 1563221, dtype: float64

print("Calculating storm surge using both methods...")
rsp = 30
surge_pytides = pytides_surge(ts[ts.columns[0]], resample=rsp)
surge_utide = utide_surge(ts[ts.columns[0]], _lat, resample=rsp)

correlation = surge_pytides.corr(surge_utide)
rmse = ((surge_pytides - surge_utide)**2).mean()**0.5

print(f"--------\n📊 Storm Surge Comparison Results:")
print(f"Correlation coefficient: {correlation:.4f}")
print(f"RMSE between methods: {rmse:.3f} meters")

Calculating storm surge using both methods...

/home/tomsail/work/gist/tide_best_practice/.venv/lib/python3.11/site-packages/pytides2/tide.py:438: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
  heights = heights[sort]

--------
📊 Storm Surge Comparison Results:
Correlation coefficient: 0.9981
RMSE between methods: 0.013 meters

(surge_pytides.resample("1h").mean().hvplot(label="surge pytides", grid=True)
 *surge_utide.resample("1h").mean().hvplot(label="surge utide")
 ).opts(
    width=1200,
    height = 500
)

def surge_chunked(ts: pd.Series, lat: float, resample: int = None, max_days: int = 365) -> pd.Series:
    ts0 = ts.copy()
    if resample is not None:
        ts = ts.resample(f"{resample}min").mean()
        ts = ts.shift(freq=f"{resample / 2}min")

    OPTS = {
        "constit": "auto",
        "method": "ols",
        "order_constit": "frequency",
        "Rayleigh_min": 0.97,
        "lat": lat,
        "verbose": True
    }

    detided = pd.Series(index=ts0.index, dtype='float64')

    t_start = ts.index.min()
    t_end = ts.index.max()
    chunk_start = t_start
    chunk_size = pd.Timedelta(days = max_days)

    while chunk_start < t_end:
        current_chunk_size = chunk_size

        while True:
            chunk_end = chunk_start + current_chunk_size
            if chunk_end > t_end:
                chunk_end = t_end

            chunk = ts[chunk_start:chunk_end]
            avail = data_availability(chunk, freq="60min")
            total_days = current_chunk_size.total_seconds()/(3600*24)
            if total_days*avail >= 365*0.9:
                print(f"Detiding chunk {chunk_start.date()} to {chunk_end.date()} ({avail*100:.1f}% available)")
                try:
                    coef = utide.solve(
                        chunk.index,
                        chunk,
                        **OPTS
                    )
                    recon_index = ts0.loc[chunk_start:chunk_end].index
                    tidal = utide.reconstruct(recon_index, coef, verbose=OPTS["verbose"])
                    detided.loc[chunk_start:chunk_end] = ts0.loc[chunk_start:chunk_end].values - tidal.h
                except Exception as e:
                    print(f"UTide failed on chunk {chunk_start} to {chunk_end}: {e}")
                break
            else:
                print(f"Data availability {avail:.1f}% from {chunk_start.date()} to {chunk_end.date()} — expanding chunk.")
                current_chunk_size += pd.Timedelta(days=6*30)
                if chunk_start + current_chunk_size > t_end:
                    print("End of time series reached with insufficient data.")
                    break

        chunk_start = chunk_end

    return detided

chunked = surge_chunked(ts, _lat, 20)

Detiding chunk 2022-01-01 to 2023-01-01 (98.8% available)
solve: matrix prep ... solution ... done.
prep/calcs ... done.
Detiding chunk 2023-01-01 to 2024-01-01 (92.0% available)
solve: matrix prep ... solution ... done.
prep/calcs ... done.
Detiding chunk 2024-01-01 to 2024-12-31 (90.4% available)
solve: matrix prep ... solution ... done.
prep/calcs ... done.

(surge_pytides.resample("1h").mean().hvplot(
    label="sugre pytides", grid=True
 )*surge_utide.resample("1h").mean().hvplot(
    label="surge utide"
 )*chunked.resample("1h").mean().hvplot(
    label="chunked utide"
 )).opts(
    width=1200,
    height = 500
)

	rad
count	7.679029e+06
mean	4.699290e+00
std	8.233551e+00
min	-9.999990e+01
25%	3.528500e+00
50%	5.369500e+00
75%	7.094600e+00
max	9.985000e+00

	amplitude	phase
MM	0.008214	192.569726
MF	0.010752	189.900280
MTM	0.001946	183.984763
MSQM	0.000064	211.482854
Q1	0.021393	285.725126
O1	0.073378	331.900683
P1	0.026480	71.327258
S1	0.005409	54.803715
K1	0.081319	81.104705
J1	0.003529	133.804127

	ioc_code	lat	lon	rss_utide	rss_pytides	corr_fes	score_utide	score_pytides
ferg	ferg	-19.277	147.058	0.011815	0.011191	0.996797	0.985020	0.985642
lame2	lame2	18.318	-64.724	0.003215	0.003062	0.987133	0.983960	0.984110
nshi	nshi	55.01	-1.44	0.008610	0.009202	0.998569	0.989972	0.989380
kawa	kawa	20.036	-155.832	0.001984	0.003518	0.982265	0.980316	0.978809
stqy2	stqy2	48.648	-2.822	0.012862	0.011067	0.998938	0.986090	0.987883
...	...	...	...	...	...	...	...	...
bang	bang	54.66	-5.67	0.014733	0.016029	0.995949	0.981275	0.979985
ilfa	ilfa	51.21	-4.11	0.013892	0.012549	0.998642	0.984769	0.986110
trst	trst	-10.586	142.222	0.016674	0.017837	0.987406	0.970942	0.969793
cwfl	cwfl	27.978	-82.832	0.010662	0.010547	0.992807	0.982222	0.982336
fpnt	fpnt	37.807	-122.465	0.003382	0.004311	0.996685	0.993314	0.992389

Tidal Analysis Comparison: UTide vs PyTides¶

Study Location¶

The data and tools compared:¶

0. Setting up functions, tools and data¶

Global variables¶

functions¶

Utide wrappers¶

pytides wrappers¶

FES / pyfes wrappers¶

Comparison wrappers¶

1: UTide Analysis¶

2: PyTides Analysis¶

3: FES¶

4. Comparison¶

visual comparison¶

Quantitave comparison¶

Visualing the RSS between Utide/pytides and FES¶

Visualisation of the correlation¶

comparison between tidal residuals¶

Second part: chunked detiding (to be continued)¶

	amplitude	phase
M2	2.696960	141.810166
S2	1.018143	187.379524
N2	0.533421	123.834273
K2	0.292225	184.647714
2N2	0.083984	106.947072
L2	0.096016	138.895029
T2	0.054274	175.715530
R2	0.007773	221.185632
NU2	0.099544	112.237101
MU2	0.143417	148.931139

	amplitude	phase	amplitude_CI	phase_CI
M2	2.698727	141.712703	0.003081	0.065412
S2	1.015954	187.427755	0.003081	0.173769
N2	0.533240	123.751469	0.003081	0.331070
K2	0.291735	184.513174	0.003081	0.605060
2N2	0.088861	113.525695	0.003081	1.986781
L2	0.096260	138.833846	0.003081	1.833901
T2	0.054464	175.609556	0.003081	3.241401
R2	0.006671	223.102025	0.003081	26.463453
NU2	0.100100	111.967316	0.003081	1.763576
MU2	0.141442	150.748488	0.003081	1.248147

		amplitude	phase	amplitude_CI	phase_CI
constituent	method
MKS2	fes	0.015689	259.114289	NaN	NaN
	pytides	0.000000	0.000000	NaN	NaN
	utide	0.013785	234.094592	0.003081	12.806398
M3	fes	0.002879	239.493529	NaN	NaN
M3	pytides	0.014143	86.524063	NaN	NaN
...	...	...	...	...	...
S2	pytides	1.018143	187.379524	NaN	NaN
S2	utide	1.015954	187.427755	0.003081	0.173769
M2	fes	2.699484	140.653826	NaN	NaN
	pytides	2.696960	141.810166	NaN	NaN
	utide	2.698727	141.712703	0.003081	0.065412