Tidal Analysis Comparison: UTide vs PyTides¶
We'll look into this notebook the ways of separating:
- the storm surge (the meteorologically-driven component)
- from the astronomical tide (the predictable gravitational component).
Study Location¶
We've chosen Roscoff, France as our test case - with some of the largest tidal ranges in Europe (up to 9 meters).
from the IOC database, extracted using searvey
, station rosc
The data and tools compared:¶
We'll evaluate the following libraries for the (de)tide analysis:
0. Setting up functions, tools and data¶
First, let's import the libraries we'll need. Each serves a specific purpose in our tidal detective work:
import glob
import hvplot.pandas
import hvplot.xarray
import numpy as np
import os
import pandas as pd
import pyfes
import searvey
import utide
import xarray as xr
from pytides2.tide import Tide
import ioc_cleanup as C
We define the FES_CONSTITUENTS
- these are the tidal components included in the FES 2022 model, representing the most important tidal harmonics globally.
We just reordered the consituent according to their frequencies and importance
Global variables¶
UTIDE_OPTS = {
"constit": "auto",
"method": "ols",
"order_constit": "frequency",
"Rayleigh_min": 0.97, # High threshold for constituent resolution
"verbose": False
}
FES_CONSTITUENTS = [
"M2", "S2", "N2", "K2", "2N2", "L2", "T2", "R2", "NU2", "MU2", "EPS2", "LAMBDA2", # Semi-diurnal (twice daily)
"K1", "O1", "P1", "Q1", "J1", "S1", # Diurnal (once daily)
"MF", "MM", "MSF", "SA", "SSA", "MSQM", "MTM", # Long period (fortnightly to annual)
"M4", "MS4", "M6", "MN4", "N4", "S4", "M8", "M3", "MKS2" # Short period (higher harmonics)
]
DATA_FOLDER = "data"
# for FES time series reconstruction
START = np.datetime64('2022-01-01T00:00:00')
END = np.datetime64("2022-04-01T00:00:00")
STEP = np.timedelta64(20, "m")
DATES = np.arange(START, END+STEP, STEP)
FES_M2 = xr.open_dataset("/home/tomsail/work/FES/load_tide/m2_fes2022.nc")
functions¶
def data_availability(series: pd.Series, freq="60min") -> float:
resampled = series.resample(freq).mean()
data_avail_ratio = 1 - resampled.isna().sum() / len(resampled)
return float(data_avail_ratio)
Utide wrappers¶
def utide_get_coefs(ts: pd.Series, lat: float, resample: int = None)-> dict:
UTIDE_OPTS["lat"] = lat
if resample is not None:
ts = ts.resample(f"{resample}min").mean()
ts = ts.shift(freq=f"{resample / 2}min") # Center the resampled points
return utide.solve(ts.index, ts, **UTIDE_OPTS)
def utide_surge(ts: pd.Series, lat: float, resample: int = None)-> pd.Series:
ts0 = ts.copy()
coef = utide_get_coefs(ts, lat, resample)
tidal = utide.reconstruct(ts0.index, coef, verbose = UTIDE_OPTS["verbose"])
return pd.Series(data=ts0.values - tidal.h, index = ts0.index)
def utide_to_df(utide_coef: utide.utilities.Bunch) -> pd.DataFrame:
return pd.DataFrame({
"amplitude": utide_coef["A"],
"phase": utide_coef["g"],
"amplitude_CI": utide_coef["A_ci"],
"phase_CI": utide_coef["g_ci"]
}, index=utide_coef["name"])
pytides wrappers¶
def pytide_get_coefs(ts: pd.Series, resample: int = None) -> dict:
if resample is not None:
ts = ts.resample(f"{resample}min").mean()
ts = ts.shift(freq=f"{resample / 2}min") # Center the resampled points
ts = ts.dropna()
return Tide.decompose(ts, ts.index.to_pydatetime())[0]
def pytides_surge(ts: pd.Series, resample: int = None)-> pd.Series:
ts0 = ts.copy()
tide = pytide_get_coefs(ts, resample)
t0 = ts.index.to_pydatetime()[0]
hours = (ts.index - ts.index[0]).total_seconds()/3600
times = Tide._times(t0, hours)
return pd.Series(ts0.values - tide.at(times), index=ts0.index)
def pytides_to_df(pytides_tide: Tide)-> pd.DataFrame:
constituent_names = [c.name.upper() for c in pytides_tide.model['constituent']]
return pd.DataFrame(pytides_tide.model, index=constituent_names).drop('constituent', axis=1)
FES / pyfes wrappers¶
def load_fes_yaml(path):
if os.path.exists(path):
return path
else:
raise ValueError(f"FES Yaml not found in {path}")
def get_pyfes_cfg(yaml_file):
cfg = pyfes.load_config(load_fes_yaml(yaml_file))
return cfg
def reduce_coef_to_fes(df: pd.DataFrame, cnst: list, verbose: bool = False):
res = pd.DataFrame(0.0, index=cnst, columns=df.columns)
common_constituents = df.index.intersection(cnst)
res.loc[common_constituents] = df.loc[common_constituents]
not_in_fes_df = df[~df.index.isin(cnst)]
not_in_fes = not_in_fes_df.index.tolist()
not_in_fes_amps = not_in_fes_df["amplitude"].round(3).tolist()
missing_fes = set(cnst) - set(df.index)
if verbose:
print(f"Constituents found but not in FES: {not_in_fes}")
print(f"Their amplitudes: {not_in_fes_amps}")
if missing_fes:
print(f"FES constituents missing from analysis (set to 0): {sorted(missing_fes)}")
return res
def get_constituent_name(constituent_enum):
name = constituent_enum.name
if name.startswith('k'):
name = name[1:].upper()
return name
def get_constituents_fes(cfg, lon, lat):
tide_dict = cfg['tide'].interpolate([lon], [lat])[0]
result = {}
for constituent, value_array in tide_dict.items():
value = value_array[0]
amplitude = np.abs(value)
phase = np.mod(np.rad2deg(np.angle(value)), 360)
name = get_constituent_name(constituent)
result[name] = {
'amplitude': amplitude/100,
'phase': phase
}
return pd.DataFrame(result).T
def fes_reconstruct(lon, lat, dates, cfg, num_threads=1):
lons = np.full(dates.shape, lon)
lats = np.full(dates.shape, lat)
tide, lp, qc = pyfes.evaluate_tide(cfg['tide'], dates, lons, lats, num_threads=num_threads)
load, load_lp, qc_lp = pyfes.evaluate_tide(cfg['radial'], dates, lons, lats, num_threads=num_threads)
geocentric_tide = tide + load + lp
df = pd.DataFrame(
{
"tide": tide,
"lp": lp,
"qc": qc,
"load": load,
"load_lp": load_lp,
"qc_lp": qc_lp,
"geocentric": geocentric_tide,
},
index=dates,
)
df.attrs["lon"] = lon
df.attrs["lat"] = lat
return df["geocentric"]
Comparison wrappers¶
def sim_on_obs(sim, obs):
obs = pd.Series(obs, name="obs")
sim = pd.Series(sim, name="sim")
df = pd.merge(sim, obs, left_index=True, right_index=True, how="outer")
df["sim"] = df["sim"].interpolate(method="linear", limit_direction="both")
df = df.dropna(subset=["obs"])
sim_ = df["sim"].drop_duplicates()
obs_ = df["obs"].drop_duplicates()
return sim_, obs_
def compute_score(corr: float, rss: float) -> float:
return np.max([0, corr]) * (1 - np.min([rss, 1]))
def concat_tides_constituents(dict_tides):
multi_df = pd.concat(dict_tides)
multi_df.index.names = ['method', 'constituent']
multi_df = multi_df.swaplevel().sort_index()
available_constituents = multi_df.index.get_level_values('constituent').unique()
filtered_order = [c for c in FES_CONSTITUENTS if c in available_constituents][::-1]
return multi_df.reindex(filtered_order, level='constituent')
def get_tidal_ts(station, ioc_df, fes_cfg, rsp = 20):
_lon = ioc_df[ioc_df.ioc_code == station].lon.values[0]
_lat = ioc_df[ioc_df.ioc_code == station].lat.values[0]
obs_file = glob.glob(f"data/{station}_*.parquet")[0]
ts = pd.read_parquet(obs_file)
ts = ts[ts.columns[0]]
# constituents
ut = utide_get_coefs(ts, _lat, resample=rsp)
utide_reduced_coef = reduce_coef_to_fes(utide_to_df(ut), FES_CONSTITUENTS)
pt = pytide_get_coefs(ts, rsp)
pytides_reduced_coef = reduce_coef_to_fes(pytides_to_df(pt), FES_CONSTITUENTS)
fes_df = get_constituents_fes(fes_cfg, _lon, _lat)
tide_ = concat_tides_constituents({
"fes":fes_df,
"utide":utide_reduced_coef,
"pytides": pytides_reduced_coef
})
# time domain
ts_fes = fes_reconstruct(lon=_lon, lat=_lat, dates=DATES, cfg=fes_cfg)
ts_fes_obs, _ = sim_on_obs(ts_fes, ts.loc[START:END])
df_obs_fes = pd.concat({
"fes": ts_fes_obs/100,
"obs": ts.loc[START: END],
}, axis=1)
return tide_, df_obs_fes
study site
station = "rosc"
sensor = "rad"
get 25 years of data
raw = searvey.fetch_ioc_station(
station,
"2000-01-01",
pd.Timestamp.now()
)
raw.describe()
rad | |
---|---|
count | 7.679029e+06 |
mean | 4.699290e+00 |
std | 8.233551e+00 |
min | -9.999990e+01 |
25% | 3.528500e+00 |
50% | 5.369500e+00 |
75% | 7.094600e+00 |
max | 9.985000e+00 |
Station Metadata
# Get station metadata
ioc_df = searvey.get_ioc_stations()
_lat = ioc_df[ioc_df.ioc_code == station].lat.values[0]
station_info = ioc_df[ioc_df.ioc_code == station]
print(f"Station: {station_info['location'].values[0]}")
print(f"Latitude: {_lat:.4f}°N")
print(f"Longitude: {station_info['lon'].values[0]:.4f}°E")
print(f"Country: {station_info['country'].values[0]}")
Station: Roscoff Latitude: 48.7190°N Longitude: -3.9660°E Country: France
let's clean the data, using ioc_cleanup
!mkdir -p transformations
import requests
response = requests.get(f'https://raw.githubusercontent.com/seareport/ioc_cleanup/refs/heads/ioc_2024/transformations/{station}_{sensor}.json')
with open(f'transformations/{station}_{sensor}.json', 'wb') as f:
f.write(response.content)
34930
here's a snapshot at the cleaning trasnformation file
! head -20 "transformations/{station}_{sensor}.json"
{ "ioc_code": "rosc", "sensor": "rad", "notes": "", "skip": false, "wip": false, "start": "2022-01-01T00:00:00", "end": "2025-01-01T00:00:00", "high": null, "low": null, "dropped_date_ranges": [ ["2022-03-27T03:00:00", "2022-03-27T03:59:00"], ["2022-12-02T13:35:00", "2022-12-02T13:39:00"], ["2023-03-26T03:00:00", "2023-03-26T03:59:00"] ], "dropped_timestamps": [ "2022-09-01T01:57:00", "2023-05-01T09:03:00", "2023-05-01T09:04:00", "2023-05-01T09:05:00",
Now let's apply these transformations to clean our data:
# Load and apply quality control transformations
trans = C.load_transformation(station, sensor)
cleaned_data = C.transform(raw, trans)
ts = cleaned_data[sensor]
print(f"Data cleaning complete!")
print(f"Original data points: {len(raw)}")
print(f"Cleaned data points: {len(ts)}")
print(f"Data availability: {data_availability(ts):.1%}")
print(f"Time range raw: {raw.index.min()} to {raw.index.max()}")
print(f"Time range clean: {ts.index.min()} to {ts.index.max()}")
Data cleaning complete! Original data points: 7679029 Cleaned data points: 1548102 Data availability: 99.6% Time range raw: 2009-09-17 23:56:24 to 2025-07-17 14:35:00 Time range clean: 2022-01-01 00:01:00 to 2025-01-01 00:00:00
1: UTide Analysis¶
out = utide_get_coefs(ts, _lat, resample=20)
print(f"Found {len(out['name'])} tidal constituents")
Found 68 tidal constituents
Let's organize the UTide results into a clean DataFrame:
print("Top 20 tidal constituents by amplitude (UTide):")
print(utide_to_df(out).sort_values('amplitude', ascending=False).head(20))
Top 20 tidal constituents by amplitude (UTide): amplitude phase amplitude_CI phase_CI M2 2.698727 141.712703 0.003081 0.065412 S2 1.015954 187.427755 0.003081 0.173769 N2 0.533240 123.751469 0.003081 0.331070 K2 0.291735 184.513174 0.003081 0.605060 MU2 0.141442 150.748488 0.003081 1.248147 M4 0.119954 140.948298 0.001153 0.550953 NU2 0.100100 111.967316 0.003081 1.763576 L2 0.096260 138.833846 0.003081 1.833901 2N2 0.088861 113.525695 0.003081 1.986781 K1 0.081898 81.700949 0.001277 0.894096 MS4 0.081814 191.957568 0.001153 0.807797 O1 0.074173 333.543720 0.001278 0.986714 SA 0.060349 304.358930 0.016419 15.386364 T2 0.054464 175.609556 0.003081 3.241401 MN4 0.047274 113.279430 0.001153 1.397993 LDA2 0.046476 110.731276 0.003081 3.798172 SSA 0.046149 79.602077 0.016350 20.206857 EPS2 0.035734 129.866658 0.003081 4.940098 MSN2 0.029038 332.005902 0.003081 6.079230 P1 0.028376 72.451468 0.001278 2.579763
2: PyTides Analysis¶
out_pytides = pytide_get_coefs(ts, 20)
/home/tomsail/work/gist/tide_best_practice/.venv/lib/python3.11/site-packages/pytides2/tide.py:438: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` heights = heights[sort]
out_pytides = pytide_get_coefs(ts, 20)
print(f"Found {len(out_pytides.model['constituent'])} tidal constituents")
Found 38 tidal constituents
Let's organize the PyTides results:
print("Top 20 tidal constituents by amplitude (PyTides):")
print(pytides_to_df(out_pytides).sort_values('amplitude', ascending=False).head(20))
Top 20 tidal constituents by amplitude (PyTides): amplitude phase Z0 5.366716 0.000000 M2 2.696960 141.810166 S2 1.018143 187.379524 N2 0.533421 123.834273 K2 0.292225 184.647714 MU2 0.143417 148.931139 M4 0.119931 141.134845 NU2 0.099544 112.237101 L2 0.096016 138.895029 2N2 0.083984 106.947072 K1 0.081825 81.773947 MS4 0.081581 192.032615 O1 0.074254 333.333368 SA 0.068742 222.775933 T2 0.054274 175.715530 SSA 0.050564 82.881197 MN4 0.047301 113.398872 LAMBDA2 0.046093 111.441733 2SM2 0.035839 5.226947 P1 0.028065 72.881608
3: FES¶
fes_cfg = get_pyfes_cfg("fes2022.yaml")
fes_df = get_constituents_fes(fes_cfg, station_info['lon'].values[0], station_info['lat'].values[0])
4. Comparison¶
To fairly compare UTide and pytides results, we'll standardize them against the FES 2022 constituent list. This will show us:
- Which constituents each method found
- Which constituents are missing from each analysis
- How the amplitudes compare for common constituents
pytides_reduced_coef = reduce_coef_to_fes(pytides_to_df(out_pytides), FES_CONSTITUENTS)
pytides_reduced_coef.head(10)
utide_reduced_coef = reduce_coef_to_fes(utide_to_df(out), FES_CONSTITUENTS)
utide_reduced_coef.head(10)
fes_df.head(10)
amplitude | phase | |
---|---|---|
M2 | 2.696960 | 141.810166 |
S2 | 1.018143 | 187.379524 |
N2 | 0.533421 | 123.834273 |
K2 | 0.292225 | 184.647714 |
2N2 | 0.083984 | 106.947072 |
L2 | 0.096016 | 138.895029 |
T2 | 0.054274 | 175.715530 |
R2 | 0.007773 | 221.185632 |
NU2 | 0.099544 | 112.237101 |
MU2 | 0.143417 | 148.931139 |
amplitude | phase | amplitude_CI | phase_CI | |
---|---|---|---|---|
M2 | 2.698727 | 141.712703 | 0.003081 | 0.065412 |
S2 | 1.015954 | 187.427755 | 0.003081 | 0.173769 |
N2 | 0.533240 | 123.751469 | 0.003081 | 0.331070 |
K2 | 0.291735 | 184.513174 | 0.003081 | 0.605060 |
2N2 | 0.088861 | 113.525695 | 0.003081 | 1.986781 |
L2 | 0.096260 | 138.833846 | 0.003081 | 1.833901 |
T2 | 0.054464 | 175.609556 | 0.003081 | 3.241401 |
R2 | 0.006671 | 223.102025 | 0.003081 | 26.463453 |
NU2 | 0.100100 | 111.967316 | 0.003081 | 1.763576 |
MU2 | 0.141442 | 150.748488 | 0.003081 | 1.248147 |
amplitude | phase | |
---|---|---|
MM | 0.008214 | 192.569726 |
MF | 0.010752 | 189.900280 |
MTM | 0.001946 | 183.984763 |
MSQM | 0.000064 | 211.482854 |
Q1 | 0.021393 | 285.725126 |
O1 | 0.073378 | 331.900683 |
P1 | 0.026480 | 71.327258 |
S1 | 0.005409 | 54.803715 |
K1 | 0.081319 | 81.104705 |
J1 | 0.003529 | 133.804127 |
visual comparison¶
What to look for:
- Major constituents (M2, S2, N2, K1, O1) should have similar amplitudes
- Minor constituents may show more variation between methods
- Missing constituents appear as zero amplitude in one method but not the other
tide_ = concat_tides_constituents({
"fes":fes_df,
"utide":utide_reduced_coef,
"pytides": pytides_reduced_coef
})
tide_
amplitude | phase | amplitude_CI | phase_CI | ||
---|---|---|---|---|---|
constituent | method | ||||
MKS2 | fes | 0.015689 | 259.114289 | NaN | NaN |
pytides | 0.000000 | 0.000000 | NaN | NaN | |
utide | 0.013785 | 234.094592 | 0.003081 | 12.806398 | |
M3 | fes | 0.002879 | 239.493529 | NaN | NaN |
pytides | 0.014143 | 86.524063 | NaN | NaN | |
... | ... | ... | ... | ... | ... |
S2 | pytides | 1.018143 | 187.379524 | NaN | NaN |
utide | 1.015954 | 187.427755 | 0.003081 | 0.173769 | |
M2 | fes | 2.699484 | 140.653826 | NaN | NaN |
pytides | 2.696960 | 141.810166 | NaN | NaN | |
utide | 2.698727 | 141.712703 | 0.003081 | 0.065412 |
102 rows × 4 columns
def plot_comparative_amplitudes(df, station):
return df.amplitude.hvplot.barh(
ylabel="Tidal Constituent",
xlabel="Amplitude (meters)",
by="method",
grid=True,
title=f"Tidal Amplitudes: UTide vs PyTide, station {station}",
legend='top_right',
rot=90
).opts(
height=1000,
width=1000,
fontsize={'title': 15, 'labels': 12, 'xticks': 8, 'yticks': 8}
)
plot_comparative_amplitudes(tide_, station)
Quantitave comparison¶
we'll assess the RSS between the all the consituents, taking pytide as the reference:
RSS is given by: [1]
$$ \operatorname{RSS} = \sum_{i=1}^{n} \left(A_{pytides,i} - A_{utide,i}\right)^2 $$
def compute_rss(df:pd.DataFrame, param:str, a:str, b:str):
df_ = df[param].unstack(level='method')
df_["rss"] = (df_[a] - df_[b])**2
return df_["rss"].sum()
print(f"utide rss for {station} is {compute_rss(tide_, 'amplitude', 'utide', 'fes'):.3f}")
print(f"pytides rss for {station} is {compute_rss(tide_, 'amplitude', 'pytides', 'fes'):.3f}")
utide rss for rosc is 0.009 pytides rss for rosc is 0.009
we'll iterate though an existing folder, contaning clean data at tide gauge locations.
res = {}
for path in glob.glob("data/*parquet"):
ts = pd.read_parquet(path)
ts = ts[ts.columns[0]]
root, file_ext = os.path.split(path)
file, ext = os.path.splitext(file_ext)
station, sensor = file.split("_")
_lon = ioc_df[ioc_df.ioc_code == station].lon.values[0]
_lat = ioc_df[ioc_df.ioc_code == station].lat.values[0]
try:
# constituents
ut = utide_get_coefs(ts, _lat, resample=20)
utide_reduced_coef = reduce_coef_to_fes(utide_to_df(ut), FES_CONSTITUENTS)
pt = pytide_get_coefs(ts, 20.)
pytides_reduced_coef = reduce_coef_to_fes(pytides_to_df(pt), FES_CONSTITUENTS)
fes_df = get_constituents_fes(fes_cfg, _lon, _lat)
tide_ = concat_tides_constituents({
"fes":fes_df,
"utide":utide_reduced_coef,
"pytides": pytides_reduced_coef
})
rss_utide = compute_rss(tide_, "amplitude", "utide", "fes")
rss_pytides = compute_rss(tide_, "amplitude", "pytides", "fes")
# time domain
ts_fes = fes_reconstruct(lon=_lon, lat=_lat, dates=DATES, cfg=fes_cfg)
ts_fes_obs, _ = sim_on_obs(ts_fes, ts.loc[START:END])
df_sim_obs_fes = pd.concat({
"fes": ts_fes_obs/100,
"obs": ts.loc[START: END],
}, axis=1)
corr_matrix = df_sim_obs_fes.corr(method="pearson")
corr_fes = corr_matrix.loc["fes", "obs"]
score_utide = compute_score(corr_fes, float(rss_utide))
score_pytides = compute_score(corr_fes, float(rss_pytides))
# results for station
res[station] = {
"ioc_code": station,
"lat": _lat,
"lon": _lon,
"rss_utide": rss_utide,
"rss_pytides": rss_pytides,
"corr_fes": corr_fes,
"score_utide": score_utide,
"score_pytides": score_pytides
}
# print(f"rss for {station} is {rss:.4f}")
except Exception as e:
print(f"couldn't process {station}: {e}")
rss_df = pd.DataFrame(res).T
rss_df.score_utide = rss_df.score_utide.astype(float)
rss_df.score_pytides = rss_df.score_pytides.astype(float)
rss_df.rss_utide = rss_df.rss_utide.astype(float)
rss_df.rss_pytides = rss_df.rss_pytides.astype(float)
rss_df["corr_fes"] = rss_df["corr_fes"].astype(float)
rss_df
ioc_code | lat | lon | rss_utide | rss_pytides | corr_fes | score_utide | score_pytides | |
---|---|---|---|---|---|---|---|---|
ferg | ferg | -19.277 | 147.058 | 0.011815 | 0.011191 | 0.996797 | 0.985020 | 0.985642 |
lame2 | lame2 | 18.318 | -64.724 | 0.003215 | 0.003062 | 0.987133 | 0.983960 | 0.984110 |
nshi | nshi | 55.01 | -1.44 | 0.008610 | 0.009202 | 0.998569 | 0.989972 | 0.989380 |
kawa | kawa | 20.036 | -155.832 | 0.001984 | 0.003518 | 0.982265 | 0.980316 | 0.978809 |
stqy2 | stqy2 | 48.648 | -2.822 | 0.012862 | 0.011067 | 0.998938 | 0.986090 | 0.987883 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
bang | bang | 54.66 | -5.67 | 0.014733 | 0.016029 | 0.995949 | 0.981275 | 0.979985 |
ilfa | ilfa | 51.21 | -4.11 | 0.013892 | 0.012549 | 0.998642 | 0.984769 | 0.986110 |
trst | trst | -10.586 | 142.222 | 0.016674 | 0.017837 | 0.987406 | 0.970942 | 0.969793 |
cwfl | cwfl | 27.978 | -82.832 | 0.010662 | 0.010547 | 0.992807 | 0.982222 | 0.982336 |
fpnt | fpnt | 37.807 | -122.465 | 0.003382 | 0.004311 | 0.996685 | 0.993314 | 0.992389 |
377 rows × 8 columns
Visualing the RSS between Utide/pytides and FES¶
rss_df.hvplot.points(
x="lon",
y="lat",
c="rss_utide",
hover_cols = ['ioc_code'],
s=100,
geo = True,
tiles = True,
clim=(0,0.2)
).opts(width = 1000, height = 800, title = "RSS difference between UTide and FES constituents")
rss_df.hvplot.points(
x="lon",
y="lat",
c="rss_pytides",
hover_cols = ['ioc_code'],
s=100,
geo = True,
tiles = True,
clim=(0,0.2)
).opts(width = 1000, height = 800, title = "RSS difference between pytides and FES constituents")
RSS between FES and observed constituents look very close globally.
Let's look at the 2 worst stations: anch2
, live
The rest of the stations are quite quite with less than 0.2 m² RSS
station= "anch2"
tide_, ts_fes_obs = get_tidal_ts(station, ioc_df, fes_cfg)
plot_comparative_amplitudes(tide_, station) + ts_fes_obs.resample("20min").mean().shift(freq="10min").hvplot().opts(height=800)
/home/tomsail/work/gist/tide_best_practice/.venv/lib/python3.11/site-packages/pytides2/tide.py:438: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` heights = heights[sort]
anchorage in Alaska is deep in a fjord
The tidal wave does not seem to be attenuated enough in the FES2022 model,
here is a representation of the M2 constituent in Alaska:
M2 = xr.open_dataset("/home/tomsail/work/FES/load_tide/m2_fes2022.nc")
M2_subset = M2.sel(lon=slice(205, 215),lat=slice(58,62))
(M2_subset.amplitude.hvplot.image(
geo=True,
alpha=0.9,
tiles=True,
cmap='rainbow4',
clim = (0,3)
)*M2_subset.phase.hvplot.contour(
geo=True,
)*ioc_df[ioc_df.ioc_code == station].hvplot.points(
x="lon",
y="lat",
geo=True,
color = "r",
line_color='k',
s=100,
hover_cols="ioc_code"
)).opts(width=1200, height=800)
station= "live"
tide_, ts_fes_obs = get_tidal_ts(station, ioc_df, fes_cfg)
plot_comparative_amplitudes(tide_, station) + ts_fes_obs.resample("20min").mean().shift(freq="10min").hvplot().opts(height=800)
/home/tomsail/work/gist/tide_best_practice/.venv/lib/python3.11/site-packages/pytides2/tide.py:438: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` heights = heights[sort]
Here on the contrary the tidal wave seems too antenuated:
M2_subset = M2.sel(lon=slice(350, 360),lat=slice(52,56))
(M2_subset.amplitude.hvplot.image(
geo=True,
alpha=0.7,
tiles=True,
cmap='rainbow4',
clim = (0,3)
)*M2_subset.phase.hvplot.contour(
geo=True,
)*ioc_df[ioc_df.ioc_code == station].hvplot.points(
x="lon",
y="lat",
geo=True,
color = "r",
line_color='k',
s=100,
hover_cols="ioc_code"
)).opts(width=1200, height=800)
Visualisation of the correlation¶
rss_df.hvplot.points(
x="lon",
y="lat",
c="corr_fes",
hover_cols = ['ioc_code'],
s=100,
cmap="rainbow4_r",
geo = True,
tiles = True,
).opts(width = 1000, height = 800, title = "correlation between obs and FES reconstructed signal")
globally correlation between FES and tidal-only signal from tide gauges is very good.
We can have a look at what is going on in the Baltic.
station= "furu"
tide_, ts_fes_obs = get_tidal_ts(station, ioc_df, fes_cfg)
plot_comparative_amplitudes(tide_, station) + ts_fes_obs.resample("20min").mean().shift(freq="10min").hvplot().opts(height=800)
/home/tomsail/work/gist/tide_best_practice/.venv/lib/python3.11/site-packages/pytides2/tide.py:438: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` heights = heights[sort]
The biggest difference is in the SSA
/SA
constituents that have 12 and 6 months period.
Preleminary conclusion is that FES is not able to account for these long period constituents in its T-UGO model.
comparison between tidal residuals¶
If both methods are working correctly, the tidal residuals - corresponding to the meteorological component - time series should be very similar.
Significant differences would indicate problems with utide
, pytides
or both approaches.
# we need to take a "total water level" observed signal
file = glob.glob(f"twl/{station}*.parquet")
ts = pd.read_parquet(file)
time 2022-01-01 00:01:00 -0.125744 2022-01-01 00:02:00 -0.119744 2022-01-01 00:03:00 -0.115744 2022-01-01 00:04:00 -0.119744 2022-01-01 00:05:00 -0.123744 ... 2024-12-30 23:56:00 0.351256 2024-12-30 23:57:00 0.345256 2024-12-30 23:58:00 0.342256 2024-12-30 23:59:00 0.346256 2024-12-31 00:00:00 0.350256 Name: rad, Length: 1563221, dtype: float64
print("Calculating storm surge using both methods...")
rsp = 30
surge_pytides = pytides_surge(ts[ts.columns[0]], resample=rsp)
surge_utide = utide_surge(ts[ts.columns[0]], _lat, resample=rsp)
correlation = surge_pytides.corr(surge_utide)
rmse = ((surge_pytides - surge_utide)**2).mean()**0.5
print(f"--------\n📊 Storm Surge Comparison Results:")
print(f"Correlation coefficient: {correlation:.4f}")
print(f"RMSE between methods: {rmse:.3f} meters")
Calculating storm surge using both methods...
/home/tomsail/work/gist/tide_best_practice/.venv/lib/python3.11/site-packages/pytides2/tide.py:438: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` heights = heights[sort]
-------- 📊 Storm Surge Comparison Results: Correlation coefficient: 0.9981 RMSE between methods: 0.013 meters
(surge_pytides.resample("1h").mean().hvplot(label="surge pytides", grid=True)
*surge_utide.resample("1h").mean().hvplot(label="surge utide")
).opts(
width=1200,
height = 500
)
Second part: chunked detiding (to be continued)¶
def surge_chunked(ts: pd.Series, lat: float, resample: int = None, max_days: int = 365) -> pd.Series:
ts0 = ts.copy()
if resample is not None:
ts = ts.resample(f"{resample}min").mean()
ts = ts.shift(freq=f"{resample / 2}min")
OPTS = {
"constit": "auto",
"method": "ols",
"order_constit": "frequency",
"Rayleigh_min": 0.97,
"lat": lat,
"verbose": True
}
detided = pd.Series(index=ts0.index, dtype='float64')
t_start = ts.index.min()
t_end = ts.index.max()
chunk_start = t_start
chunk_size = pd.Timedelta(days = max_days)
while chunk_start < t_end:
current_chunk_size = chunk_size
while True:
chunk_end = chunk_start + current_chunk_size
if chunk_end > t_end:
chunk_end = t_end
chunk = ts[chunk_start:chunk_end]
avail = data_availability(chunk, freq="60min")
total_days = current_chunk_size.total_seconds()/(3600*24)
if total_days*avail >= 365*0.9:
print(f"Detiding chunk {chunk_start.date()} to {chunk_end.date()} ({avail*100:.1f}% available)")
try:
coef = utide.solve(
chunk.index,
chunk,
**OPTS
)
recon_index = ts0.loc[chunk_start:chunk_end].index
tidal = utide.reconstruct(recon_index, coef, verbose=OPTS["verbose"])
detided.loc[chunk_start:chunk_end] = ts0.loc[chunk_start:chunk_end].values - tidal.h
except Exception as e:
print(f"UTide failed on chunk {chunk_start} to {chunk_end}: {e}")
break
else:
print(f"Data availability {avail:.1f}% from {chunk_start.date()} to {chunk_end.date()} — expanding chunk.")
current_chunk_size += pd.Timedelta(days=6*30)
if chunk_start + current_chunk_size > t_end:
print("End of time series reached with insufficient data.")
break
chunk_start = chunk_end
return detided
chunked = surge_chunked(ts, _lat, 20)
Detiding chunk 2022-01-01 to 2023-01-01 (98.8% available) solve: matrix prep ... solution ... done. prep/calcs ... done. Detiding chunk 2023-01-01 to 2024-01-01 (92.0% available) solve: matrix prep ... solution ... done. prep/calcs ... done. Detiding chunk 2024-01-01 to 2024-12-31 (90.4% available) solve: matrix prep ... solution ... done. prep/calcs ... done.
(surge_pytides.resample("1h").mean().hvplot(
label="sugre pytides", grid=True
)*surge_utide.resample("1h").mean().hvplot(
label="surge utide"
)*chunked.resample("1h").mean().hvplot(
label="chunked utide"
)).opts(
width=1200,
height = 500
)