From f56ccfd4fc82fcdb6e96da1ed6fa899ddce29287 Mon Sep 17 00:00:00 2001
From: Pete Steward <p.steward@cgiar.org>
Date: Wed, 24 Jun 2026 13:59:08 +0300
Subject: [PATCH 1/3] feat: add xclim-backed vpd support

---
 .../climate_statistics/statistics.py          |  46 ++-
 climate_tookit/climatology/__init__.py        |   8 +
 climate_tookit/climatology/vpd.py             | 261 ++++++++++++++++++
 .../compare_periods/ensemble_periods.py       |   6 +-
 climate_tookit/compare_periods/periods.py     |  14 +-
 tests/test_compare_periods_baseline.py        |  11 +
 tests/test_statistics_source_policy.py        |  42 +++
 tests/test_vpd.py                             |  77 ++++++
 8 files changed, 458 insertions(+), 7 deletions(-)
 create mode 100644 climate_tookit/climatology/vpd.py
 create mode 100644 tests/test_vpd.py

diff --git a/climate_tookit/climate_statistics/statistics.py b/climate_tookit/climate_statistics/statistics.py
index 4210aec..e0d7b0f 100644
--- a/climate_tookit/climate_statistics/statistics.py
+++ b/climate_tookit/climate_statistics/statistics.py
@@ -44,6 +44,7 @@
     compute_monthly_spi,
     list_thi_livestock_profiles,
     resolve_thi_profile,
+    summarize_vpd_period,
 )
 try:
     from climate_tookit.fetch_data.preprocess_data.preprocess_data import preprocess_data
@@ -681,6 +682,16 @@ def _livestock_heat_stress_summary(
         ),
     }
 
+
+def _vpd_summary(df: pd.DataFrame) -> Optional[Dict[str, Any]]:
+    """Summarize VPD when humidity or dewpoint-backed inputs are available."""
+    if df is None or df.empty or 'date' not in df.columns:
+        return None
+    summary = summarize_vpd_period(df)
+    if not summary:
+        return None
+    return summary
+
 def raw_climate_summary(df: pd.DataFrame) -> List[Dict[str, Any]]:
     """
     Compact summary table -- mean / min / max / std per core variable.
@@ -743,6 +754,7 @@ def overall_statistics(
     }
     water_balance_stats.update(shared_wb)
     heat_stress = _livestock_heat_stress_summary(df, thi_profile=thi_profile)
+    vpd = _vpd_summary(df)
 
     result = {
         'total_days': int(len(df)),
@@ -764,6 +776,8 @@ def overall_statistics(
         },
         'water_balance': water_balance_stats,
     }
+    if vpd:
+        result['vpd'] = vpd
     if heat_stress:
         result['livestock_heat_stress'] = heat_stress
     return result
@@ -851,6 +865,7 @@ def season_statistics(
         )
 
     heat_stress = _livestock_heat_stress_summary(sdf, thi_profile=thi_profile)
+    vpd = _vpd_summary(sdf)
 
     result = {
         'onset':       onset_ts.strftime('%Y-%m-%d'),
@@ -872,6 +887,8 @@ def season_statistics(
         'water_balance': water_balance_stats,
         'water_balance_methodology': water_balance_methodology,
     }
+    if vpd:
+        result['vpd'] = vpd
     if heat_stress:
         result['livestock_heat_stress'] = heat_stress
     return result
@@ -1110,7 +1127,7 @@ def ltm_season_summary(
             'length_days_avg': _avg([s.get('length_days') for s in seasons], 1),
         }
 
-        for cat in ('precipitation', 'temperature', 'water_balance', 'livestock_heat_stress'):
+        for cat in ('precipitation', 'temperature', 'water_balance', 'vpd', 'livestock_heat_stress'):
             pool: Dict[str, List[float]] = {}
             for s in seasons:
                 for k, v in (s.get(cat) or {}).items():
@@ -1128,9 +1145,21 @@ def ltm_season_summary(
                             and (s.get(cat) or {}).get(meta_key) is not None
                         ),
                         None,
-                    )
+                        )
                     if meta_value is not None:
                         block.setdefault(cat, {})[meta_key] = meta_value
+            if cat == "vpd":
+                method_value = next(
+                    (
+                        (s.get(cat) or {}).get("method")
+                        for s in seasons
+                        if isinstance(s.get(cat), dict)
+                        and (s.get(cat) or {}).get("method") is not None
+                    ),
+                    None,
+                )
+                if method_value is not None:
+                    block.setdefault(cat, {})["method"] = method_value
 
         ov_pool: Dict[str, Dict[str, List[float]]] = {}
         for s in seasons:
@@ -2301,6 +2330,7 @@ def print_overall_by_season(seasons: List[Dict]) -> None:
             ('temperature',   'Temperature'),
             ('et0',           'ET0'),
             ('water_balance', 'Water Balance'),
+            ('vpd', 'VPD'),
             ('livestock_heat_stress', 'Livestock THI'),
         ]:
             if var_key not in stats:
@@ -2343,6 +2373,12 @@ def print_seasons(seasons: List[Dict]) -> None:
               f"mean_tavg={t['mean_tavg']}°C | "
               f"max_tmax={t['max_tmax']}°C | "
               f"min_tmin={t['min_tmin']}°C")
+        v = s.get('vpd') or {}
+        if v:
+            print(f"    VPD           : "
+                  f"mean_vpd={v.get('mean_vpd_kpa')} kPa | "
+                  f"max_vpd={v.get('max_vpd_kpa')} kPa | "
+                  f"method={v.get('method')}")
         h = s.get('livestock_heat_stress') or {}
         if h:
             print(f"    {_thi_profile_label(h)} : "
@@ -2426,6 +2462,12 @@ def print_ltm_by_season(ltm: Dict[str, Any],
                   f"mean_tavg={t.get('mean_tavg')}°C | "
                   f"max_tmax={t.get('max_tmax')}°C | "
                   f"min_tmin={t.get('min_tmin')}°C")
+        v = w.get('vpd') or {}
+        if v:
+            print(f"    VPD           : "
+                  f"mean_vpd={v.get('mean_vpd_kpa')} kPa | "
+                  f"max_vpd={v.get('max_vpd_kpa')} kPa | "
+                  f"method={v.get('method')}")
         h = w.get('livestock_heat_stress') or {}
         if h:
             print(f"    {_thi_profile_label(h)} : "
diff --git a/climate_tookit/climatology/__init__.py b/climate_tookit/climatology/__init__.py
index f4a6841..0dc56ff 100644
--- a/climate_tookit/climatology/__init__.py
+++ b/climate_tookit/climatology/__init__.py
@@ -14,6 +14,7 @@
     "compute_monthly_spei",
     "compute_monthly_spi",
     "compute_daily_thi",
+    "compute_daily_vpd",
     "DEFAULT_LIVESTOCK_CLIMATE_PROFILE",
     "DEFAULT_LIVESTOCK_TYPE",
     "describe_thi_source_support",
@@ -22,6 +23,7 @@
     "prepare_monthly_climatic_water_balance",
     "prepare_monthly_precipitation_totals",
     "resolve_thi_profile",
+    "summarize_vpd_period",
     "summarize_thi_periods",
     "assess_xclim_precip_annual_readiness",
     "compare_xclim_precip_indices",
@@ -49,6 +51,12 @@ def __getattr__(name: str):
     }:
         module = import_module(".heat_stress", __name__)
         return getattr(module, name)
+    if name in {
+        "compute_daily_vpd",
+        "summarize_vpd_period",
+    }:
+        module = import_module(".vpd", __name__)
+        return getattr(module, name)
     if name in {
         "compute_monthly_spei",
         "compute_monthly_spi",
diff --git a/climate_tookit/climatology/vpd.py b/climate_tookit/climatology/vpd.py
new file mode 100644
index 0000000..456f317
--- /dev/null
+++ b/climate_tookit/climatology/vpd.py
@@ -0,0 +1,261 @@
+"""Vapour pressure deficit helpers built on xclim where possible."""
+
+from __future__ import annotations
+
+from importlib.util import find_spec
+from typing import Any, Iterable, Optional, Sequence
+
+import pandas as pd
+
+XCLIM_AVAILABLE = bool(find_spec("xarray")) and bool(find_spec("xclim"))
+
+_TMEAN_COLS = ("mean_temperature", "tavg", "temperature_c", "tas")
+_TMAX_COLS = ("max_temperature", "tmax", "tasmax")
+_TMIN_COLS = ("min_temperature", "tmin", "tasmin")
+_HUMIDITY_COLS = ("humidity", "relative_humidity", "hurs", "RHAV", "rh")
+_DEWPOINT_COLS = (
+    "dewpoint",
+    "dewpoint_temperature",
+    "dewpoint_temperature_2m",
+    "tdew",
+)
+
+
+def _detect_column(frame: pd.DataFrame, candidates: Sequence[str]) -> Optional[str]:
+    return next((column for column in candidates if column in frame.columns), None)
+
+
+def _ensure_xclim():
+    if not XCLIM_AVAILABLE:
+        raise RuntimeError("xclim is not installed in this environment.")
+    import xarray as xr  # local import to avoid import-time side effects
+    import xclim.indices as xci
+
+    return xr, xci
+
+
+def _to_numeric_celsius(series: pd.Series) -> pd.Series:
+    values = pd.to_numeric(series, errors="coerce")
+    clean = values.dropna()
+    if not clean.empty and float(clean.mean()) > 100.0:
+        return values - 273.15
+    return values
+
+
+def _build_data_array(dates: pd.Series, values: pd.Series, *, units: str):
+    xr, _ = _ensure_xclim()
+    return xr.DataArray(
+        pd.to_numeric(values, errors="coerce").astype(float).to_numpy(),
+        coords={"time": pd.to_datetime(dates).to_numpy()},
+        dims="time",
+        attrs={"units": units},
+    )
+
+
+def _resolve_temperature(
+    frame: pd.DataFrame,
+    *,
+    tmean_col: Optional[str] = None,
+    tmax_col: Optional[str] = None,
+    tmin_col: Optional[str] = None,
+) -> tuple[pd.Series, str]:
+    mean_column = tmean_col or _detect_column(frame, _TMEAN_COLS)
+    if mean_column:
+        return _to_numeric_celsius(frame[mean_column]), mean_column
+
+    max_column = tmax_col or _detect_column(frame, _TMAX_COLS)
+    min_column = tmin_col or _detect_column(frame, _TMIN_COLS)
+    if max_column and min_column:
+        tmax = _to_numeric_celsius(frame[max_column])
+        tmin = _to_numeric_celsius(frame[min_column])
+        return (tmax + tmin) / 2.0, "derived_from_tmax_tmin"
+
+    raise ValueError(
+        "VPD workflow needs mean temperature or both tmax and tmin."
+    )
+
+
+def _resolve_humidity(
+    frame: pd.DataFrame,
+    *,
+    humidity_col: Optional[str] = None,
+) -> tuple[pd.Series, str]:
+    column = humidity_col or _detect_column(frame, _HUMIDITY_COLS)
+    if not column:
+        raise ValueError("Missing required humidity column for VPD workflow.")
+    humidity = pd.to_numeric(frame[column], errors="coerce")
+    valid = humidity.between(0.0, 100.0) | humidity.isna()
+    if not bool(valid.all()):
+        raise ValueError("Humidity values for VPD must be between 0 and 100 percent.")
+    return humidity, column
+
+
+def _resolve_dewpoint(
+    frame: pd.DataFrame,
+    *,
+    dewpoint_col: Optional[str] = None,
+) -> tuple[pd.Series, str]:
+    column = dewpoint_col or _detect_column(frame, _DEWPOINT_COLS)
+    if not column:
+        raise ValueError("Missing required dewpoint column for VPD workflow.")
+    return _to_numeric_celsius(frame[column]), column
+
+
+def _threshold_columns(vpd_kpa: pd.Series, thresholds_kpa: Iterable[float]) -> dict[str, pd.Series]:
+    columns: dict[str, pd.Series] = {}
+    for threshold in thresholds_kpa:
+        label = str(threshold).replace(".", "p")
+        columns[f"days_above_{label}_kpa"] = (vpd_kpa > float(threshold)).astype(float)
+    return columns
+
+
+def compute_daily_vpd(
+    frame: pd.DataFrame,
+    *,
+    date_col: str = "date",
+    method: str = "auto",
+    humidity_col: Optional[str] = None,
+    dewpoint_col: Optional[str] = None,
+    tmean_col: Optional[str] = None,
+    tmax_col: Optional[str] = None,
+    tmin_col: Optional[str] = None,
+    xclim_method: str = "sonntag90",
+) -> pd.DataFrame:
+    """
+    Compute daily VPD in kPa.
+
+    Canonical paths use xclim:
+    - relative humidity path via xclim.indices.vapor_pressure_deficit
+    - dewpoint path via xclim.indices.saturation_vapor_pressure
+
+    Atlas proxy is optional and explicitly labeled.
+    """
+    if date_col not in frame.columns:
+        raise ValueError(f"Missing required date column: {date_col}")
+
+    xr, xci = _ensure_xclim()
+    out = frame.copy()
+    out[date_col] = pd.to_datetime(out[date_col])
+    out = out.sort_values(date_col).reset_index(drop=True)
+
+    resolved_method = method
+    if method == "auto":
+        if humidity_col or _detect_column(out, _HUMIDITY_COLS):
+            resolved_method = "relative_humidity"
+        elif dewpoint_col or _detect_column(out, _DEWPOINT_COLS):
+            resolved_method = "dewpoint"
+        else:
+            raise ValueError(
+                "VPD auto mode needs humidity or dewpoint inputs. "
+                "Atlas proxy must be requested explicitly."
+            )
+
+    temperature_c, temperature_source = _resolve_temperature(
+        out,
+        tmean_col=tmean_col,
+        tmax_col=tmax_col,
+        tmin_col=tmin_col,
+    )
+    tas_da = _build_data_array(out[date_col], temperature_c, units="degC")
+
+    metadata: dict[str, Any] = {
+        "backend": "xclim",
+        "temperature_source": temperature_source,
+        "units": "kPa",
+    }
+
+    if resolved_method == "relative_humidity":
+        humidity, humidity_source = _resolve_humidity(out, humidity_col=humidity_col)
+        hurs_da = xr.DataArray(
+            humidity.astype(float).to_numpy(),
+            coords={"time": pd.to_datetime(out[date_col]).to_numpy()},
+            dims="time",
+            attrs={"units": "%"},
+        )
+        vpd_pa = xci.vapor_pressure_deficit(
+            tas_da,
+            hurs_da,
+            method=xclim_method,
+        )
+        metadata.update(
+            {
+                "path": "relative_humidity",
+                "humidity_source_column": humidity_source,
+                "saturation_vapor_pressure_method": xclim_method,
+            }
+        )
+    elif resolved_method == "dewpoint":
+        dewpoint_c, dewpoint_source = _resolve_dewpoint(out, dewpoint_col=dewpoint_col)
+        dew_da = _build_data_array(out[date_col], dewpoint_c, units="degC")
+        saturation = xci.saturation_vapor_pressure(tas_da, method=xclim_method)
+        actual = xci.saturation_vapor_pressure(dew_da, method=xclim_method)
+        vpd_pa = saturation - actual
+        metadata.update(
+            {
+                "path": "dewpoint",
+                "dewpoint_source_column": dewpoint_source,
+                "saturation_vapor_pressure_method": xclim_method,
+            }
+        )
+    elif resolved_method == "atlas_proxy":
+        max_column = tmax_col or _detect_column(out, _TMAX_COLS)
+        min_column = tmin_col or _detect_column(out, _TMIN_COLS)
+        if not (max_column and min_column):
+            raise ValueError("Atlas VPD proxy needs both tmax and tmin.")
+        tmax_c = _to_numeric_celsius(out[max_column])
+        tmin_c = _to_numeric_celsius(out[min_column])
+        tmax_da = _build_data_array(out[date_col], tmax_c, units="degC")
+        tmin_da = _build_data_array(out[date_col], tmin_c, units="degC")
+        es_tmax = xci.saturation_vapor_pressure(tmax_da, method=xclim_method)
+        es_tmin = xci.saturation_vapor_pressure(tmin_da, method=xclim_method)
+        vpd_pa = 0.7 * (es_tmax - es_tmin)
+        metadata.update(
+            {
+                "path": "atlas_proxy",
+                "proxy_formula": "0.7 * (es(tmax) - es(tmin))",
+                "tmax_source_column": max_column,
+                "tmin_source_column": min_column,
+                "saturation_vapor_pressure_method": xclim_method,
+            }
+        )
+    else:
+        raise ValueError(
+            "Unsupported VPD method. Use auto, relative_humidity, dewpoint, or atlas_proxy."
+        )
+
+    vpd_series = pd.Series(vpd_pa.values, index=out.index, dtype=float).clip(lower=0.0) / 1000.0
+    out["temperature_c"] = temperature_c
+    out["vpd_kpa"] = vpd_series
+    out.attrs["vpd_metadata"] = metadata
+    return out
+
+
+def summarize_vpd_period(
+    frame: pd.DataFrame,
+    *,
+    thresholds_kpa: Sequence[float] = (),
+    **kwargs,
+) -> Optional[dict[str, Any]]:
+    try:
+        vpd_df = compute_daily_vpd(frame, **kwargs)
+    except (RuntimeError, ValueError):
+        return None
+
+    if "vpd_kpa" not in vpd_df.columns or vpd_df["vpd_kpa"].notna().sum() == 0:
+        return None
+
+    summary: dict[str, Any] = {
+        "mean_vpd_kpa": round(float(vpd_df["vpd_kpa"].mean()), 3),
+        "max_vpd_kpa": round(float(vpd_df["vpd_kpa"].max()), 3),
+        "method": vpd_df.attrs.get("vpd_metadata", {}).get("path"),
+    }
+    for column_name, mask in _threshold_columns(vpd_df["vpd_kpa"], thresholds_kpa).items():
+        summary[column_name] = int(mask.sum())
+    return summary
+
+
+__all__ = [
+    "XCLIM_AVAILABLE",
+    "compute_daily_vpd",
+    "summarize_vpd_period",
+]
diff --git a/climate_tookit/compare_periods/ensemble_periods.py b/climate_tookit/compare_periods/ensemble_periods.py
index 8fdf862..d94c24a 100644
--- a/climate_tookit/compare_periods/ensemble_periods.py
+++ b/climate_tookit/compare_periods/ensemble_periods.py
@@ -863,13 +863,13 @@ def _build_focal_summary(location:     Tuple[float, float],
         windows = []
         for sn in sorted(grp):
             agg = _agg_seasons(grp[sn])
-            block = {c: agg[c] for c in ("precipitation", "temperature", "water_balance", "livestock_heat_stress")
+            block = {c: agg[c] for c in ("precipitation", "temperature", "water_balance", "vpd", "livestock_heat_stress")
                      if isinstance(agg.get(c), dict)}
             windows.append({"season_number": sn, "block": block})
         season_summary: Dict[str, Any] = {"windows": windows}
     else:
         agg = _agg_seasons(seasons)
-        block = {c: agg[c] for c in ("precipitation", "temperature", "water_balance", "livestock_heat_stress")
+        block = {c: agg[c] for c in ("precipitation", "temperature", "water_balance", "vpd", "livestock_heat_stress")
                  if isinstance(agg.get(c), dict)}
         season_summary = {"block": block}
     season_summary = _merge_seasonal_spei_into_summary(
@@ -904,7 +904,7 @@ def _build_focal_summary(location:     Tuple[float, float],
 def _season_block(seasons: List[Dict]) -> Dict[str, Any]:
     """Reduce a list of season rows to {cat: {metric: number}} for the comparable cats."""
     agg = _agg_seasons(seasons)
-    return {c: agg[c] for c in ("precipitation", "temperature", "water_balance", "livestock_heat_stress")
+    return {c: agg[c] for c in ("precipitation", "temperature", "water_balance", "vpd", "livestock_heat_stress")
             if isinstance(agg.get(c), dict)}
 
 def _mean_2level(maps: List[Dict[str, Dict[str, Any]]], round_n: int = 2) -> Dict[str, Any]:
diff --git a/climate_tookit/compare_periods/periods.py b/climate_tookit/compare_periods/periods.py
index 58adf11..75815b7 100644
--- a/climate_tookit/compare_periods/periods.py
+++ b/climate_tookit/compare_periods/periods.py
@@ -35,7 +35,7 @@
 )
 from climate_tookit.crop_calendar.ggcmi import CALENDAR_SYSTEM_CHOICES
 
-CATEGORIES   = ["precipitation", "temperature", "et0", "water_balance", "spei"]
+CATEGORIES   = ["precipitation", "temperature", "et0", "water_balance", "vpd", "spei"]
 ANNUALIZABLE = {
     "precipitation": ["total_mm", "rainy_days", "dry_days"],
     "et0":           ["total_mm"],
@@ -749,7 +749,7 @@ def _agg_seasons(seasons: List[Dict[str, Any]]) -> Dict[str, Any]:
         return {"_n": 0}
     sums: Dict[str, List[float]] = {}
     for s in seasons:
-        for cat in ("precipitation", "temperature", "water_balance", "livestock_heat_stress"):
+        for cat in ("precipitation", "temperature", "water_balance", "vpd", "livestock_heat_stress"):
             for m, v in (s.get(cat) or {}).items():
                 if _is_num(v):
                     sums.setdefault(f"{cat}.{m}", []).append(float(v))
@@ -769,6 +769,16 @@ def _agg_seasons(seasons: List[Dict[str, Any]]) -> Dict[str, Any]:
         for meta_key in ("livestock_type", "livestock_label", "climate_profile"):
             if first_heat.get(meta_key) is not None:
                 out.setdefault("livestock_heat_stress", {})[meta_key] = first_heat.get(meta_key)
+    first_vpd = next(
+        (
+            s.get("vpd")
+            for s in seasons
+            if isinstance(s.get("vpd"), dict)
+        ),
+        None,
+    )
+    if isinstance(first_vpd, dict) and first_vpd.get("method") is not None:
+        out.setdefault("vpd", {})["method"] = first_vpd.get("method")
     methodology = _summarize_methodology_rows(seasons)
     if methodology:
         out["water_balance_methodology"] = methodology
diff --git a/tests/test_compare_periods_baseline.py b/tests/test_compare_periods_baseline.py
index d8bff21..c0350db 100644
--- a/tests/test_compare_periods_baseline.py
+++ b/tests/test_compare_periods_baseline.py
@@ -99,6 +99,17 @@ def test_print_report_renders_missing_pct_as_na(self):
         self.assertNotIn("Category", stdout.getvalue())
         self.assertIn("metric", stdout.getvalue())
 
+    def test_diff_block_includes_vpd_family(self):
+        diff = cp._diff_block(
+            {"vpd": {"mean_vpd_kpa": 1.8, "max_vpd_kpa": 3.2}},
+            {"vpd": {"mean_vpd_kpa": 1.2, "max_vpd_kpa": 2.5}},
+            "focal",
+            "baseline_avg",
+        )
+
+        self.assertIn("vpd", diff)
+        self.assertAlmostEqual(0.6, diff["vpd"]["mean_vpd_kpa"]["diff"], places=6)
+
     def test_print_report_renders_spei_block(self):
         payload = {
             "focal_year": 2019,
diff --git a/tests/test_statistics_source_policy.py b/tests/test_statistics_source_policy.py
index 485cc34..2b0d28e 100644
--- a/tests/test_statistics_source_policy.py
+++ b/tests/test_statistics_source_policy.py
@@ -143,6 +143,25 @@ def test_overall_statistics_adds_livestock_thi_when_humidity_present(self):
         self.assertGreater(heat["max_thi"], heat["mean_thi"])
         self.assertEqual("cattle_dairy", heat["livestock_type"])
 
+    def test_overall_statistics_adds_vpd_when_humidity_present(self):
+        df = pd.DataFrame(
+            {
+                "date": pd.to_datetime(["2020-01-01", "2020-01-02", "2020-01-03"]),
+                "precip": [2.0, 0.0, 1.0],
+                "tmax": [30.0, 32.0, 35.0],
+                "tmin": [20.0, 22.0, 24.0],
+                "humidity": [50.0, 80.0, 85.0],
+                "ET0_mm_day": [4.0, 4.0, 4.0],
+                "water_balance": [-2.0, -4.0, -3.0],
+            }
+        )
+
+        result = stats.overall_statistics(df)
+
+        self.assertIn("vpd", result)
+        self.assertEqual("relative_humidity", result["vpd"]["method"])
+        self.assertGreater(result["vpd"]["max_vpd_kpa"], result["vpd"]["mean_vpd_kpa"])
+
     def test_season_statistics_includes_shared_root_zone_metrics(self):
         df = pd.DataFrame(
             {
@@ -239,6 +258,29 @@ def test_season_statistics_skips_livestock_thi_without_humidity(self):
 
         self.assertNotIn("livestock_heat_stress", result)
 
+    def test_season_statistics_adds_vpd_when_humidity_present(self):
+        df = pd.DataFrame(
+            {
+                "date": pd.to_datetime(["2020-03-01", "2020-03-02"]),
+                "precip": [3.0, 1.0],
+                "tmax": [29.0, 30.0],
+                "tmin": [18.0, 19.0],
+                "humidity": [60.0, 55.0],
+                "ET0_mm_day": [4.0, 4.0],
+                "water_balance": [-1.0, -3.0],
+            }
+        )
+        season = {
+            "onset": pd.Timestamp("2020-03-01"),
+            "cessation": pd.Timestamp("2020-03-02"),
+            "length_days": 2,
+        }
+
+        result = stats.season_statistics(df, season, season_df=df)
+
+        self.assertIn("vpd", result)
+        self.assertEqual("relative_humidity", result["vpd"]["method"])
+
     def test_compile_season_results_reuses_main_window_root_zone_summary(self):
         df = pd.DataFrame(
             {
diff --git a/tests/test_vpd.py b/tests/test_vpd.py
new file mode 100644
index 0000000..85e2d4d
--- /dev/null
+++ b/tests/test_vpd.py
@@ -0,0 +1,77 @@
+import unittest
+
+import pandas as pd
+
+from climate_tookit.climatology import compute_daily_vpd, summarize_vpd_period
+
+
+class VpdTests(unittest.TestCase):
+    def test_compute_daily_vpd_uses_relative_humidity_path(self):
+        frame = pd.DataFrame(
+            {
+                "date": pd.to_datetime(["2020-01-01", "2020-01-02"]),
+                "tmax": [30.0, 32.0],
+                "tmin": [20.0, 22.0],
+                "humidity": [50.0, 80.0],
+            }
+        )
+
+        result = compute_daily_vpd(frame)
+
+        self.assertIn("vpd_kpa", result.columns)
+        self.assertAlmostEqual(25.0, result.loc[0, "temperature_c"], places=6)
+        self.assertGreater(result.loc[0, "vpd_kpa"], result.loc[1, "vpd_kpa"])
+        self.assertEqual("relative_humidity", result.attrs["vpd_metadata"]["path"])
+        self.assertEqual("derived_from_tmax_tmin", result.attrs["vpd_metadata"]["temperature_source"])
+
+    def test_compute_daily_vpd_uses_dewpoint_path(self):
+        frame = pd.DataFrame(
+            {
+                "date": pd.to_datetime(["2020-01-01", "2020-01-02"]),
+                "mean_temperature": [25.0, 25.0],
+                "dewpoint_temperature": [25.0, 20.0],
+            }
+        )
+
+        result = compute_daily_vpd(frame, method="dewpoint")
+
+        self.assertAlmostEqual(0.0, result.loc[0, "vpd_kpa"], places=6)
+        self.assertGreater(result.loc[1, "vpd_kpa"], 0.0)
+        self.assertEqual("dewpoint", result.attrs["vpd_metadata"]["path"])
+
+    def test_compute_daily_vpd_atlas_proxy_is_explicitly_labeled(self):
+        frame = pd.DataFrame(
+            {
+                "date": pd.to_datetime(["2020-01-01"]),
+                "tmax": [32.0],
+                "tmin": [20.0],
+            }
+        )
+
+        result = compute_daily_vpd(frame, method="atlas_proxy")
+
+        self.assertGreater(result.loc[0, "vpd_kpa"], 0.0)
+        self.assertEqual("atlas_proxy", result.attrs["vpd_metadata"]["path"])
+        self.assertIn("proxy_formula", result.attrs["vpd_metadata"])
+
+    def test_summarize_vpd_period_counts_thresholds(self):
+        frame = pd.DataFrame(
+            {
+                "date": pd.to_datetime(["2020-01-01", "2020-01-02", "2020-01-03"]),
+                "tmax": [30.0, 34.0, 28.0],
+                "tmin": [20.0, 24.0, 18.0],
+                "humidity": [90.0, 35.0, 75.0],
+            }
+        )
+
+        result = summarize_vpd_period(frame, thresholds_kpa=(1.0,))
+
+        self.assertIn("mean_vpd_kpa", result)
+        self.assertIn("max_vpd_kpa", result)
+        self.assertEqual("relative_humidity", result["method"])
+        self.assertIn("days_above_1p0_kpa", result)
+        self.assertGreaterEqual(result["days_above_1p0_kpa"], 1)
+
+
+if __name__ == "__main__":
+    unittest.main()

From c2307e98f6c01dd7a848fec0fb290af4459f9359 Mon Sep 17 00:00:00 2001
From: Pete Steward <p.steward@cgiar.org>
Date: Wed, 24 Jun 2026 14:22:00 +0300
Subject: [PATCH 2/3] refactor: drop non-canonical vpd proxy

---
 climate_tookit/climatology/vpd.py | 28 ++--------------------------
 tests/test_vpd.py                 |  9 +++------
 2 files changed, 5 insertions(+), 32 deletions(-)

diff --git a/climate_tookit/climatology/vpd.py b/climate_tookit/climatology/vpd.py
index 456f317..357b99c 100644
--- a/climate_tookit/climatology/vpd.py
+++ b/climate_tookit/climatology/vpd.py
@@ -127,8 +127,6 @@ def compute_daily_vpd(
     Canonical paths use xclim:
     - relative humidity path via xclim.indices.vapor_pressure_deficit
     - dewpoint path via xclim.indices.saturation_vapor_pressure
-
-    Atlas proxy is optional and explicitly labeled.
     """
     if date_col not in frame.columns:
         raise ValueError(f"Missing required date column: {date_col}")
@@ -146,8 +144,7 @@ def compute_daily_vpd(
             resolved_method = "dewpoint"
         else:
             raise ValueError(
-                "VPD auto mode needs humidity or dewpoint inputs. "
-                "Atlas proxy must be requested explicitly."
+                "VPD auto mode needs humidity or dewpoint inputs."
             )
 
     temperature_c, temperature_source = _resolve_temperature(
@@ -197,30 +194,9 @@ def compute_daily_vpd(
                 "saturation_vapor_pressure_method": xclim_method,
             }
         )
-    elif resolved_method == "atlas_proxy":
-        max_column = tmax_col or _detect_column(out, _TMAX_COLS)
-        min_column = tmin_col or _detect_column(out, _TMIN_COLS)
-        if not (max_column and min_column):
-            raise ValueError("Atlas VPD proxy needs both tmax and tmin.")
-        tmax_c = _to_numeric_celsius(out[max_column])
-        tmin_c = _to_numeric_celsius(out[min_column])
-        tmax_da = _build_data_array(out[date_col], tmax_c, units="degC")
-        tmin_da = _build_data_array(out[date_col], tmin_c, units="degC")
-        es_tmax = xci.saturation_vapor_pressure(tmax_da, method=xclim_method)
-        es_tmin = xci.saturation_vapor_pressure(tmin_da, method=xclim_method)
-        vpd_pa = 0.7 * (es_tmax - es_tmin)
-        metadata.update(
-            {
-                "path": "atlas_proxy",
-                "proxy_formula": "0.7 * (es(tmax) - es(tmin))",
-                "tmax_source_column": max_column,
-                "tmin_source_column": min_column,
-                "saturation_vapor_pressure_method": xclim_method,
-            }
-        )
     else:
         raise ValueError(
-            "Unsupported VPD method. Use auto, relative_humidity, dewpoint, or atlas_proxy."
+            "Unsupported VPD method. Use auto, relative_humidity, or dewpoint."
         )
 
     vpd_series = pd.Series(vpd_pa.values, index=out.index, dtype=float).clip(lower=0.0) / 1000.0
diff --git a/tests/test_vpd.py b/tests/test_vpd.py
index 85e2d4d..6e66b8e 100644
--- a/tests/test_vpd.py
+++ b/tests/test_vpd.py
@@ -39,7 +39,7 @@ def test_compute_daily_vpd_uses_dewpoint_path(self):
         self.assertGreater(result.loc[1, "vpd_kpa"], 0.0)
         self.assertEqual("dewpoint", result.attrs["vpd_metadata"]["path"])
 
-    def test_compute_daily_vpd_atlas_proxy_is_explicitly_labeled(self):
+    def test_compute_daily_vpd_auto_requires_moisture_input(self):
         frame = pd.DataFrame(
             {
                 "date": pd.to_datetime(["2020-01-01"]),
@@ -48,11 +48,8 @@ def test_compute_daily_vpd_atlas_proxy_is_explicitly_labeled(self):
             }
         )
 
-        result = compute_daily_vpd(frame, method="atlas_proxy")
-
-        self.assertGreater(result.loc[0, "vpd_kpa"], 0.0)
-        self.assertEqual("atlas_proxy", result.attrs["vpd_metadata"]["path"])
-        self.assertIn("proxy_formula", result.attrs["vpd_metadata"])
+        with self.assertRaisesRegex(ValueError, "humidity or dewpoint inputs"):
+            compute_daily_vpd(frame)
 
     def test_summarize_vpd_period_counts_thresholds(self):
         frame = pd.DataFrame(

From c55eafaccd72779879038bbf480901382c2c2010 Mon Sep 17 00:00:00 2001
From: Pete Steward <p.steward@cgiar.org>
Date: Wed, 24 Jun 2026 14:28:15 +0300
Subject: [PATCH 3/3] docs: note chc alignment for vpd

---
 README.md                         |  8 ++++++--
 climate_tookit/climatology/vpd.py | 11 ++++++++++-
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 3530eea..decc5f0 100644
--- a/README.md
+++ b/README.md
@@ -527,13 +527,16 @@ Important source notes:
 - `chirts` is temperature-only, so climatology will omit precipitation outputs.
 
 SPEI and xclim helpers are library functions, not standalone console scripts.
-Use them from Python after fetching/preprocessing climate data:
+Use them from Python after fetching/preprocessing climate data. VPD helpers use
+`xclim` for thermodynamic calculation and prefer CHC-consistent
+moisture-informed inputs (`relative_humidity` or `dewpoint`) when available:
 
 ```python
 from climate_tookit.climatology.spei import (
     compute_monthly_spei,
     prepare_monthly_climatic_water_balance,
 )
+from climate_tookit.climatology import summarize_vpd_period
 from climate_tookit.climatology.xclim_reference import (
     compute_xclim_precip_indices,
 )
@@ -541,6 +544,7 @@ from climate_tookit.climatology.xclim_reference import (
 monthly_balance = prepare_monthly_climatic_water_balance(df, lat=-1.286)
 spei_12 = compute_monthly_spei(monthly_balance, scale=12)
 xclim_precip = compute_xclim_precip_indices(df)
+vpd = summarize_vpd_period(df)
 ```
 
 ---
@@ -761,7 +765,7 @@ Core current-state modules:
 | `compare_datasets` | source-vs-source comparison workflow | shared fetch pipeline |
 | `weather_station` | station discovery, ingestion, selection, station-vs-grid validation | NOAA/custom station inputs, gridded fetch layer |
 | `crop_calendar` | GGCMI crop calendar lookups and presets | `season_analysis`, `climate_statistics`, `compare_periods` |
-| `climatology` | SPEI and xclim-backed climatology helpers | `climate_statistics`, `compare_periods`, `weather_station` |
+| `climatology` | SPEI and xclim-backed climatology helpers, including CHC-aligned moisture-informed VPD support | `climate_statistics`, `compare_periods`, `weather_station` |
 
 Notes:
 
diff --git a/climate_tookit/climatology/vpd.py b/climate_tookit/climatology/vpd.py
index 357b99c..72c210a 100644
--- a/climate_tookit/climatology/vpd.py
+++ b/climate_tookit/climatology/vpd.py
@@ -1,4 +1,9 @@
-"""Vapour pressure deficit helpers built on xclim where possible."""
+"""Vapour pressure deficit helpers built on xclim.
+
+Toolkit uses xclim for VPD thermodynamics while preferring moisture-informed
+inputs consistent with CHC workflows where available: relative humidity or
+dewpoint, not temperature-only proxies.
+"""
 
 from __future__ import annotations
 
@@ -127,6 +132,10 @@ def compute_daily_vpd(
     Canonical paths use xclim:
     - relative humidity path via xclim.indices.vapor_pressure_deficit
     - dewpoint path via xclim.indices.saturation_vapor_pressure
+
+    This matches CHC method family direction: moisture-informed derivation from
+    relative humidity or dewpoint-backed actual vapour pressure, not
+    temperature-only proxy estimates.
     """
     if date_col not in frame.columns:
         raise ValueError(f"Missing required date column: {date_col}")