Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 24 additions & 5 deletions .github/workflows/full_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,28 @@
branches: [main]

jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: astral-sh/ruff-action@v2
with:
version: 0.6.2
src: src

build:

Check warning

Code scanning / CodeQL

Workflow does not contain permissions Medium

Actions job or workflow does not limit the permissions of the GITHUB_TOKEN. Consider setting an explicit permissions block, using the following as a minimal starting point: {contents: read}
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.10", "3.14"]
pandas-version: ["pandas2", "pandas3"] # TODO: drop pandas2 once 3.x is well-established
exclude:
- python-version: "3.10"
pandas-version: "pandas3"

steps:
- uses: actions/checkout@v4

- uses: astral-sh/ruff-action@v2
with:
version: 0.6.2
src: src

- uses: extractions/setup-just@v3

- name: Set up uv
Expand All @@ -32,6 +40,17 @@
- name: Install dependencies
run: uv sync --group test --group networks --no-dev

- name: Install pandas 2.x
if: matrix.pandas-version == 'pandas2'
run: uv run pip install "pandas>=2.0,<3.0"

- name: Install pandas 3.x
if: matrix.pandas-version == 'pandas3'
run: uv run pip install "pandas>=3.0,<4.0"

- name: Show pandas version
run: uv run python -c "import pandas; print(f'pandas {pandas.__version__}')"

- name: Type check
run: just typecheck

Expand Down
16 changes: 14 additions & 2 deletions src/modelskill/comparison/_comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,18 @@ def _parse_dataset(data: xr.Dataset) -> xr.Dataset:
# matched_data = self._matched_data_to_xarray(matched_data)
assert "Observation" in data.data_vars

# Normalize datetime precision to avoid xarray interp issues with pandas 3.0
# Different data sources may have different precisions (datetime64[s], datetime64[us], etc.)
# Use nanoseconds (ns) for backward compatibility with pandas 2.x
# Note: The dtype.kind == "M" check is required because some datasets use
# non-datetime indexes (e.g., RangeIndex in tests). Only DatetimeIndex has
# the .as_unit() method, so we must skip normalization for other index types.
if data.time.dtype.kind == "M": # M = datetime64
time_pd = data.time.to_index() # Preserves freq attribute
if time_pd.dtype != "datetime64[ns]":
time_index = time_pd.as_unit("ns")
data = data.assign_coords(time=time_index)

# no missing values allowed in Observation
if data["Observation"].isnull().any():
raise ValueError("Observation data must not contain missing values.")
Expand Down Expand Up @@ -345,12 +357,12 @@ def _matched_data_to_xarray(
)

# check that items.obs and items.model are numeric
if not np.issubdtype(df[items.obs].dtype, np.number):
if not pd.api.types.is_numeric_dtype(df[items.obs].dtype):
raise ValueError(
"Observation data is of type {df[items.obs].dtype}, it must be numeric"
)
for m in items.model:
if not np.issubdtype(df[m].dtype, np.number):
if not pd.api.types.is_numeric_dtype(df[m].dtype):
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about extracting is_numeric_dtype at import level, it would make these lines cleaner.

raise ValueError(
f"Model data: {m} is of type {df[m].dtype}, it must be numeric"
)
Expand Down
4 changes: 2 additions & 2 deletions src/modelskill/comparison/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,9 @@ def _add_spatial_grid_to_df(
bins_y = np.arange(y_start, y_end + binsize / 2, binsize)
# cut and get bin centre
df["xBin"] = pd.cut(df.x, bins=bins_x)
df["xBin"] = df["xBin"].apply(lambda x: x.mid)
df["xBin"] = df["xBin"].apply(lambda x: x.mid if pd.notna(x) else x)
df["yBin"] = pd.cut(df.y, bins=bins_y)
df["yBin"] = df["yBin"].apply(lambda x: x.mid)
df["yBin"] = df["yBin"].apply(lambda x: x.mid if pd.notna(x) else x)

return df

Expand Down
8 changes: 5 additions & 3 deletions src/modelskill/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -587,9 +587,11 @@ def peak_ratio(
time = obs.index

# Calculate number of years
dt_int = (time[1:].values - time[0:-1].values).view("int64")
dt_int_mode = float(stats.mode(dt_int, keepdims=False)[0]) / 1e9 # in seconds
N_years = dt_int_mode / 24 / 3600 / 365.25 * len(time)
# Use total_seconds() to handle any datetime precision (ns, us, ms, s)
dt = time[1:] - time[:-1]
dt_seconds = dt.total_seconds().values
dt_mode_seconds = float(stats.mode(dt_seconds, keepdims=False)[0])
N_years = dt_mode_seconds / 24 / 3600 / 365.25 * len(time)
peak_index, AAP_ = _partial_duration_series(
time,
obs,
Expand Down
2 changes: 1 addition & 1 deletion src/modelskill/model/dummy.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class DummyModelResult:
--------
>>> import pandas as pd
>>> import modelskill as ms
>>> df = pd.DataFrame([0.0, 1.0], index=pd.date_range("2000", freq="H", periods=2))
>>> df = pd.DataFrame([0.0, 1.0], index=pd.date_range("2000", freq="h", periods=2))
>>> obs = ms.PointObservation(df, name="foo")
>>> mr = ms.DummyModelResult(strategy='mean')
>>> pmr = mr.extract(obs)
Expand Down
11 changes: 11 additions & 0 deletions src/modelskill/timeseries/_point.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,17 @@ def _convert_to_dataset(
data = data.rename({time_dim_name: "time"})
ds = data

# Normalize datetime precision to avoid xarray interp issues with pandas 3.0
# Different data sources (dfs0 files, DataFrames) may have different precisions
# (datetime64[s], datetime64[us], datetime64[ns], etc.), and xarray.interp()
# fails when interpolating between datasets with mismatched precisions.
# Use nanoseconds (ns) for backward compatibility with pandas 2.x
if ds.time.dtype.kind == "M": # M = datetime
time_pd = ds.time.to_index() # Preserves freq attribute
if time_pd.dtype != "datetime64[ns]":
time_index = time_pd.as_unit("ns")
ds = ds.assign_coords(time=time_index)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A part of me wants to wrap this with a function, even though it is only used two times: here and in _comparison.py


name = _validate_data_var_name(varname)

n_unique_times = len(ds.time.to_index().unique())
Expand Down
31 changes: 25 additions & 6 deletions tests/test_comparercollection.py
Original file line number Diff line number Diff line change
Expand Up @@ -570,12 +570,31 @@ def test_plot_accepts_figsize(cc_plot_function):
assert a, b == figsize


def test_peak_ratio(cc):
"""Non existent peak ratio"""
cc = cc.sel(model="m1")
sk = cc.skill(metrics=["peak_ratio"])

assert sk.loc["fake point obs", "peak_ratio"] == pytest.approx(1.119999999)
def test_peak_ratio():
"""Test peak_ratio with synthetic data containing clear, verifiable peaks"""
# Create data with 2 clear peaks:
# Peak 1: obs=5.0, model=5.5 → ratio=1.1
# Peak 2: obs=6.0, model=6.6 → ratio=1.1
# Expected peak_ratio = mean([1.1, 1.1]) = 1.1
times = pd.date_range("2020-01-01", periods=100, freq="h")
obs_vals = np.zeros(100)
mod_vals = np.zeros(100)

# Create peak 1 around index 10
obs_vals[8:13] = [0, 1, 5, 1, 0]
mod_vals[8:13] = [0, 1.1, 5.5, 1.1, 0]

# Create peak 2 around index 50
obs_vals[48:53] = [0, 1, 6, 1, 0]
mod_vals[48:53] = [0, 1.1, 6.6, 1.1, 0]

df = pd.DataFrame({"Observation": obs_vals, "model": mod_vals}, index=times)

cmp = ms.from_matched(df, obs_item=0, name="synthetic_peaks")
sk = cmp.skill(metrics=["peak_ratio"])

# Model peaks are 1.1x observation peaks
assert sk.to_dataframe()["peak_ratio"].values == pytest.approx(1.1, abs=0.01)


def test_peak_ratio_2(cc_pr):
Expand Down
4 changes: 2 additions & 2 deletions tests/test_simple_compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,12 +78,12 @@ def test_matching_pointobservation_with_trackmodelresult_is_not_possible():
# ignore the data
tdf = pd.DataFrame(
{"x": [1, 2], "y": [1, 2], "m1": [0, 0]},
index=pd.date_range("2017-10-27 13:00:01", periods=2, freq="4S"),
index=pd.date_range("2017-10-27 13:00:01", periods=2, freq="4s"),
)
mr = ms.TrackModelResult(tdf, item="m1", x_item="x", y_item="y")
pdf = pd.DataFrame(
data={"level": [0.0, 0.0]},
index=pd.date_range("2017-10-27 13:00:01", periods=2, freq="4S"),
index=pd.date_range("2017-10-27 13:00:01", periods=2, freq="4s"),
)
obs = ms.PointObservation(pdf, item="level")
with pytest.raises(TypeError, match="TrackModelResult"):
Expand Down
Loading