Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 64 additions & 10 deletions CMEW/app/configure_recipe/bin/configure_recipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
Generate the required user configuration file for ESMValTool.
"""
import os

import yaml


Expand All @@ -16,10 +15,11 @@ def main():
The user configuration file is written to the path defined by
the environment variable ``USER_CONFIG_PATH``.
"""
# Retrieve the values defined in the environment for the
# 'configure_recipe' task.
values = retrieve_values_from_task_env()

developer_config_path = create_developer_config_file(values)
values["CONFIG_DEVELOPER_FILE"] = developer_config_path

# Create the contents for the user configuration file using these
# values.
user_config_file_contents = create_user_config_file(values)
Expand Down Expand Up @@ -54,6 +54,7 @@ def retrieve_values_from_task_env():
"DRS_OBS4MIPS": os.environ["DRS_OBS4MIPS"],
"DRS_OBS6": os.environ["DRS_OBS6"],
"MAX_PARALLEL_TASKS": os.environ["MAX_PARALLEL_TASKS"],
"MIP_TABLE_DIR": os.environ.get("MIP_TABLE_DIR", ""),
"OUTPUT_DIR": os.environ["OUTPUT_DIR"],
"ROOTPATH_ANA4MIPS": os.environ["ROOTPATH_ANA4MIPS"],
"ROOTPATH_CMIP3": os.environ["ROOTPATH_CMIP3"],
Expand All @@ -70,6 +71,62 @@ def retrieve_values_from_task_env():
return values_from_task_env


def create_developer_config_file(values):
share_dir = values["CYLC_WORKFLOW_SHARE_DIR"]
developer_config_path = os.path.join(
share_dir, "etc", "config-developer.yml"
)
mip_table_dir = values.get("MIP_TABLE_DIR", "").strip()
if not mip_table_dir:
raise KeyError("MIP_TABLE_DIR must be set")
contents = {
"custom": {"cmor_path": os.path.expanduser(mip_table_dir)},
"ESMVal": {
"cmor_strict": True,
"input_dir": {
"default": "/",
"BADC": "{activity}/{institute}/{dataset}/{exp}/"
"{ensemble}/{mip}/{short_name}/{grid}/{version}",
"DKRZ": "{activity}/{institute}/{dataset}/{exp}/"
"{ensemble}/{mip}/{short_name}/{grid}/{version}",
"ESGF": "{project}/{activity}/{institute}/{dataset}/{exp}/"
"{ensemble}/{mip}/{short_name}/{grid}/{version}",
"ETHZ": "{exp}/{mip}/{short_name}/{dataset}/"
"{ensemble}/{grid}/",
"SYNDA": "{activity}/{institute}/{dataset}/{exp}/"
"{ensemble}/{mip}/{short_name}/{grid}/{version}",
},
"input_file": "{short_name}_{mip}_{dataset}_{exp}_{ensemble}"
"_{grid}*.nc",
"output_file": "{project}_{dataset}_{mip}_{exp}_{ensemble}"
"_{short_name}_{grid}",
"cmor_type": "CMIP6",
"cmor_default_table_prefix": "GCModelDev_",
},
"obs4MIPs": {
"cmor_strict": False,
"input_dir": {
"default": "Tier{tier}/{dataset}",
"ESGF": "{project}/{dataset}/{version}",
"RCAST": "/",
"IPSL": "{realm}/{short_name}/{freq}/{grid}/{institute}"
"/{dataset}/{latest_version}",
},
"input_file": {
"default": "{short_name}_*.nc",
"ESGF": "{short_name}_*.nc",
},
"output_file": "{project}_{dataset}_{short_name}",
"cmor_type": "CMIP6",
"cmor_path": "obs4mips",
"cmor_default_table_prefix": "obs4MIPs_",
},
}
os.makedirs(os.path.dirname(developer_config_path), exist_ok=True)
write_yaml(developer_config_path, contents)
return developer_config_path


def create_user_config_file(values=None):
"""
Return the contents of the user configuration file.
Expand All @@ -87,19 +144,14 @@ def create_user_config_file(values=None):
if values is None:
values = {}

# Developer config is now generated alongside the user config.
if "CYLC_WORKFLOW_SHARE_DIR" in values:
config_developer_file = os.path.join(
values["CYLC_WORKFLOW_SHARE_DIR"],
"etc",
"config-developer.yml",
)
esmval = os.path.join(
values["CYLC_WORKFLOW_SHARE_DIR"],
"work",
"GCModelDev",
)
else:
config_developer_file = None
esmval = None

if "MAX_PARALLEL_TASKS" in values:
Expand All @@ -117,7 +169,9 @@ def create_user_config_file(values=None):
# additional datasets, so may need to be configured in the future.
user_config_file_contents = {
"auxiliary_data_dir": "",
"config_developer_file": config_developer_file,
# CHANGED: point ESMValTool at the developer config we will generate,
# instead of relying on a hard-copied file.
"config_developer_file": values.get("CONFIG_DEVELOPER_FILE"),
"download_dir": "",
"drs": {
"ana4mips": values.get("DRS_ANA4MIPS", None),
Expand Down
1 change: 0 additions & 1 deletion CMEW/app/configure_recipe/rose-app.conf
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

[command]
default=cmew-esmvaltool-env configure_recipe.py
=cp ${CYLC_WORKFLOW_RUN_DIR}/app/configure_recipe/config_files/config-developer.yml ${CYLC_WORKFLOW_SHARE_DIR}/etc/.

[file:$CONFIG_DIR]
mode=mkdir
41 changes: 29 additions & 12 deletions CMEW/app/configure_standardise/bin/create_request_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,15 @@
"""
Generates the request configuration file from the ESMValTool recipe.

Supports per-run metadata via RUNS_CONFIG_PATH + RUN_LABEL,
while keeping backward compatibility with legacy env vars
MODEL_ID/SUITE_ID/CALENDAR/VARIANT_LABEL.
Supports per-run metadata via RUNS_CONFIG_PATH + RUN_LABEL, while keeping
backward compatibility with legacy env vars MODEL_ID/SUITE_ID/
CALENDAR/VARIANT_LABEL.

Naming requirement:
- In ALL modes (legacy and multi-run), set workflow_basename = suite_id
so CDDS paths are cdds_<suite_id>.
- MIP_TABLE_DIR must be set and written to common.mip_table_dir.
This must match ESMValTool developer config custom.cmor_path.
"""

import configparser
Expand Down Expand Up @@ -86,11 +88,26 @@ def _get_required_env(name: str) -> str:
return val


def _get_required_mip_table_dir() -> str:
"""
Return expanded MIP_TABLE_DIR.

Must match ESMValTool developer config custom.cmor_path.
"""
mip_table_dir = os.environ.get("MIP_TABLE_DIR", "").strip()
if not mip_table_dir:
raise KeyError(
"MIP_TABLE_DIR must be set (must match ESMValTool developer "
"config custom.cmor_path)."
)
return os.path.expanduser(mip_table_dir)


def _normalize_run_entry(run_key: str, cfg: Any) -> Dict[str, str]:
if not isinstance(cfg, dict):
raise ValueError(
f"Runs config entry for '{run_key}' must be an object, \
got {type(cfg)}"
f"Runs config entry for '{run_key}' must be an object, "
f"got {type(cfg)}"
)

model_id = cfg.get("model_id") or cfg.get("MODEL_ID")
Expand All @@ -110,8 +127,7 @@ def _normalize_run_entry(run_key: str, cfg: Any) -> Dict[str, str]:
]
if missing:
raise KeyError(
f"Missing keys for run '{run_key}' \
in runs config: {missing}"
f"Missing keys for run '{run_key}' in runs config: {missing}"
)

return {
Expand Down Expand Up @@ -147,8 +163,8 @@ def _resolve_run_metadata(run_label: str) -> Dict[str, str]:

raise KeyError(
f"RUN_LABEL='{run_label}' not found as a key in runs config "
f"and did not match any suite_id. Available keys: \
{sorted(runs_cfg.keys())}"
f"and did not match any suite_id. Available keys: "
f"{sorted(runs_cfg.keys())}"
)

# Legacy fallback
Expand All @@ -161,6 +177,9 @@ def _resolve_run_metadata(run_label: str) -> Dict[str, str]:


def create_request() -> configparser.ConfigParser:
# required and must match ESMValTool developer config custom.cmor_path
mip_table_dir = _get_required_mip_table_dir()

start_year = int(_get_required_env("START_YEAR"))
number_of_years = int(_get_required_env("NUMBER_OF_YEARS"))
end_year = start_year + number_of_years
Expand Down Expand Up @@ -201,9 +220,7 @@ def create_request() -> configparser.ConfigParser:
request["common"] = {
"external_plugin": "",
"external_plugin_location": "",
"mip_table_dir": os.path.expanduser(
"~cdds/etc/mip_tables/GCModelDev/0.0.25"
),
"mip_table_dir": mip_table_dir,
"mode": "relaxed",
"package": "round-1",
"root_proc_dir": _get_required_env("ROOT_PROC_DIR"),
Expand Down
94 changes: 0 additions & 94 deletions CMEW/app/configure_standardise/bin/create_variables_file.py

This file was deleted.

34 changes: 33 additions & 1 deletion CMEW/app/configure_standardise/bin/test_create_request_file.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# (C) Crown Copyright 2024-2026, Met Office.
# The LICENSE.md file contains full licensing details.

import os
import pytest

from create_request_file import create_request

Expand All @@ -18,18 +20,29 @@ def test_create_request(monkeypatch):
monkeypatch.setenv("VARIABLES_PATH", "/path/to/variables.txt")
monkeypatch.setenv("VARIANT_LABEL", "r1i1p1f1")

# required and must match developer config custom.cmor_path
monkeypatch.setenv(
"MIP_TABLE_DIR", "~cdds/etc/mip_tables/GCModelDev/0.0.25"
)

config = create_request()
actual = {
section: dict(config.items(section)) for section in config.sections()
}

expected = {
"metadata": {
"branch_method": "no parent",
"calendar": "360_day",
"base_date": "1850-01-01T00:00:00",
"experiment_id": "amip",
"institution_id": "MOHC",
"license": "GCModelDev model data is licensed under the Open Government License v3 (https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/)", # noqa: E501
"license": (
"GCModelDev model data is licensed under "
"the Open Government License v3 "
"(https://www.nationalarchives.gov.uk/doc"
"/open-government-licence/version/3/)"
),
"mip": "ESMVal",
"mip_era": "GCModelDev",
"model_id": "UKESM1-0-LL",
Expand Down Expand Up @@ -68,4 +81,23 @@ def test_create_request(monkeypatch):
"cylc_args": "--no-detach -v",
},
}

assert actual == expected


# Added negative test to make regressions more obvious
def test_create_request_requires_mip_table_dir(monkeypatch):
monkeypatch.setenv("START_YEAR", "1993")
monkeypatch.setenv("NUMBER_OF_YEARS", "1")
monkeypatch.setenv("CALENDAR", "360_day")
monkeypatch.setenv("INSTITUTION_ID", "MOHC")
monkeypatch.setenv("MODEL_ID", "UKESM1-0-LL")
monkeypatch.setenv("ROOT_PROC_DIR", "/path/to/proc/dir/")
monkeypatch.setenv("ROOT_DATA_DIR", "/path/to/data/dir/")
monkeypatch.setenv("SUITE_ID", "u-az513")
monkeypatch.setenv("VARIABLES_PATH", "/path/to/variables.txt")
monkeypatch.setenv("VARIANT_LABEL", "r1i1p1f1")
monkeypatch.delenv("MIP_TABLE_DIR", raising=False)

with pytest.raises(KeyError, match="MIP_TABLE_DIR must be set"):
create_request()
2 changes: 2 additions & 0 deletions CMEW/flow.cylc
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@
DRS_NATIVE6 = {{ DRS_NATIVE6 }}
DRS_OBS4MIPS = {{ DRS_OBS4MIPS }}
MAX_PARALLEL_TASKS = {{ MAX_PARALLEL_TASKS }}
MIP_TABLE_DIR = {{ MIP_TABLE_DIR }}
ROOTPATH_CMIP3 = {{ ROOTPATH_CMIP3 }}
ROOTPATH_CMIP5 = {{ ROOTPATH_CMIP5 }}
ROOTPATH_CMIP6 = {{ ROOTPATH_CMIP6 }}
Expand Down Expand Up @@ -130,6 +131,7 @@
REQUEST_PATH = ${CYLC_WORKFLOW_SHARE_DIR}/etc/request_%(dataset)s.cfg
START_YEAR = {{ START_YEAR }}
NUMBER_OF_YEARS = {{ NUMBER_OF_YEARS }}
MIP_TABLE_DIR = {{ MIP_TABLE_DIR }}

[[standardise_model_data<dataset>]]
inherit = STANDARDISE
Expand Down
Loading