diff --git a/CMEW/app/configure_recipe/bin/configure_recipe.py b/CMEW/app/configure_recipe/bin/configure_recipe.py index 59d6b046..4b5cf1e8 100755 --- a/CMEW/app/configure_recipe/bin/configure_recipe.py +++ b/CMEW/app/configure_recipe/bin/configure_recipe.py @@ -5,7 +5,6 @@ Generate the required user configuration file for ESMValTool. """ import os - import yaml @@ -16,10 +15,11 @@ def main(): The user configuration file is written to the path defined by the environment variable ``USER_CONFIG_PATH``. """ - # Retrieve the values defined in the environment for the - # 'configure_recipe' task. values = retrieve_values_from_task_env() + developer_config_path = create_developer_config_file(values) + values["CONFIG_DEVELOPER_FILE"] = developer_config_path + # Create the contents for the user configuration file using these # values. user_config_file_contents = create_user_config_file(values) @@ -54,6 +54,7 @@ def retrieve_values_from_task_env(): "DRS_OBS4MIPS": os.environ["DRS_OBS4MIPS"], "DRS_OBS6": os.environ["DRS_OBS6"], "MAX_PARALLEL_TASKS": os.environ["MAX_PARALLEL_TASKS"], + "MIP_TABLE_DIR": os.environ.get("MIP_TABLE_DIR", ""), "OUTPUT_DIR": os.environ["OUTPUT_DIR"], "ROOTPATH_ANA4MIPS": os.environ["ROOTPATH_ANA4MIPS"], "ROOTPATH_CMIP3": os.environ["ROOTPATH_CMIP3"], @@ -70,6 +71,62 @@ def retrieve_values_from_task_env(): return values_from_task_env +def create_developer_config_file(values): + share_dir = values["CYLC_WORKFLOW_SHARE_DIR"] + developer_config_path = os.path.join( + share_dir, "etc", "config-developer.yml" + ) + mip_table_dir = values.get("MIP_TABLE_DIR", "").strip() + if not mip_table_dir: + raise KeyError("MIP_TABLE_DIR must be set") + contents = { + "custom": {"cmor_path": os.path.expanduser(mip_table_dir)}, + "ESMVal": { + "cmor_strict": True, + "input_dir": { + "default": "/", + "BADC": "{activity}/{institute}/{dataset}/{exp}/" + "{ensemble}/{mip}/{short_name}/{grid}/{version}", + "DKRZ": "{activity}/{institute}/{dataset}/{exp}/" + "{ensemble}/{mip}/{short_name}/{grid}/{version}", + "ESGF": "{project}/{activity}/{institute}/{dataset}/{exp}/" + "{ensemble}/{mip}/{short_name}/{grid}/{version}", + "ETHZ": "{exp}/{mip}/{short_name}/{dataset}/" + "{ensemble}/{grid}/", + "SYNDA": "{activity}/{institute}/{dataset}/{exp}/" + "{ensemble}/{mip}/{short_name}/{grid}/{version}", + }, + "input_file": "{short_name}_{mip}_{dataset}_{exp}_{ensemble}" + "_{grid}*.nc", + "output_file": "{project}_{dataset}_{mip}_{exp}_{ensemble}" + "_{short_name}_{grid}", + "cmor_type": "CMIP6", + "cmor_default_table_prefix": "GCModelDev_", + }, + "obs4MIPs": { + "cmor_strict": False, + "input_dir": { + "default": "Tier{tier}/{dataset}", + "ESGF": "{project}/{dataset}/{version}", + "RCAST": "/", + "IPSL": "{realm}/{short_name}/{freq}/{grid}/{institute}" + "/{dataset}/{latest_version}", + }, + "input_file": { + "default": "{short_name}_*.nc", + "ESGF": "{short_name}_*.nc", + }, + "output_file": "{project}_{dataset}_{short_name}", + "cmor_type": "CMIP6", + "cmor_path": "obs4mips", + "cmor_default_table_prefix": "obs4MIPs_", + }, + } + os.makedirs(os.path.dirname(developer_config_path), exist_ok=True) + write_yaml(developer_config_path, contents) + return developer_config_path + + def create_user_config_file(values=None): """ Return the contents of the user configuration file. @@ -87,19 +144,14 @@ def create_user_config_file(values=None): if values is None: values = {} + # Developer config is now generated alongside the user config. if "CYLC_WORKFLOW_SHARE_DIR" in values: - config_developer_file = os.path.join( - values["CYLC_WORKFLOW_SHARE_DIR"], - "etc", - "config-developer.yml", - ) esmval = os.path.join( values["CYLC_WORKFLOW_SHARE_DIR"], "work", "GCModelDev", ) else: - config_developer_file = None esmval = None if "MAX_PARALLEL_TASKS" in values: @@ -117,7 +169,9 @@ def create_user_config_file(values=None): # additional datasets, so may need to be configured in the future. user_config_file_contents = { "auxiliary_data_dir": "", - "config_developer_file": config_developer_file, + # CHANGED: point ESMValTool at the developer config we will generate, + # instead of relying on a hard-copied file. + "config_developer_file": values.get("CONFIG_DEVELOPER_FILE"), "download_dir": "", "drs": { "ana4mips": values.get("DRS_ANA4MIPS", None), diff --git a/CMEW/app/configure_recipe/rose-app.conf b/CMEW/app/configure_recipe/rose-app.conf index a9a82f66..69548769 100644 --- a/CMEW/app/configure_recipe/rose-app.conf +++ b/CMEW/app/configure_recipe/rose-app.conf @@ -3,7 +3,6 @@ [command] default=cmew-esmvaltool-env configure_recipe.py - =cp ${CYLC_WORKFLOW_RUN_DIR}/app/configure_recipe/config_files/config-developer.yml ${CYLC_WORKFLOW_SHARE_DIR}/etc/. [file:$CONFIG_DIR] mode=mkdir diff --git a/CMEW/app/configure_standardise/bin/create_request_file.py b/CMEW/app/configure_standardise/bin/create_request_file.py index 35afe0a2..5326b796 100755 --- a/CMEW/app/configure_standardise/bin/create_request_file.py +++ b/CMEW/app/configure_standardise/bin/create_request_file.py @@ -5,13 +5,15 @@ """ Generates the request configuration file from the ESMValTool recipe. -Supports per-run metadata via RUNS_CONFIG_PATH + RUN_LABEL, -while keeping backward compatibility with legacy env vars -MODEL_ID/SUITE_ID/CALENDAR/VARIANT_LABEL. +Supports per-run metadata via RUNS_CONFIG_PATH + RUN_LABEL, while keeping +backward compatibility with legacy env vars MODEL_ID/SUITE_ID/ +CALENDAR/VARIANT_LABEL. Naming requirement: - In ALL modes (legacy and multi-run), set workflow_basename = suite_id so CDDS paths are cdds_. +- MIP_TABLE_DIR must be set and written to common.mip_table_dir. + This must match ESMValTool developer config custom.cmor_path. """ import configparser @@ -86,11 +88,26 @@ def _get_required_env(name: str) -> str: return val +def _get_required_mip_table_dir() -> str: + """ + Return expanded MIP_TABLE_DIR. + + Must match ESMValTool developer config custom.cmor_path. + """ + mip_table_dir = os.environ.get("MIP_TABLE_DIR", "").strip() + if not mip_table_dir: + raise KeyError( + "MIP_TABLE_DIR must be set (must match ESMValTool developer " + "config custom.cmor_path)." + ) + return os.path.expanduser(mip_table_dir) + + def _normalize_run_entry(run_key: str, cfg: Any) -> Dict[str, str]: if not isinstance(cfg, dict): raise ValueError( - f"Runs config entry for '{run_key}' must be an object, \ - got {type(cfg)}" + f"Runs config entry for '{run_key}' must be an object, " + f"got {type(cfg)}" ) model_id = cfg.get("model_id") or cfg.get("MODEL_ID") @@ -110,8 +127,7 @@ def _normalize_run_entry(run_key: str, cfg: Any) -> Dict[str, str]: ] if missing: raise KeyError( - f"Missing keys for run '{run_key}' \ - in runs config: {missing}" + f"Missing keys for run '{run_key}' in runs config: {missing}" ) return { @@ -147,8 +163,8 @@ def _resolve_run_metadata(run_label: str) -> Dict[str, str]: raise KeyError( f"RUN_LABEL='{run_label}' not found as a key in runs config " - f"and did not match any suite_id. Available keys: \ - {sorted(runs_cfg.keys())}" + f"and did not match any suite_id. Available keys: " + f"{sorted(runs_cfg.keys())}" ) # Legacy fallback @@ -161,6 +177,9 @@ def _resolve_run_metadata(run_label: str) -> Dict[str, str]: def create_request() -> configparser.ConfigParser: + # required and must match ESMValTool developer config custom.cmor_path + mip_table_dir = _get_required_mip_table_dir() + start_year = int(_get_required_env("START_YEAR")) number_of_years = int(_get_required_env("NUMBER_OF_YEARS")) end_year = start_year + number_of_years @@ -201,9 +220,7 @@ def create_request() -> configparser.ConfigParser: request["common"] = { "external_plugin": "", "external_plugin_location": "", - "mip_table_dir": os.path.expanduser( - "~cdds/etc/mip_tables/GCModelDev/0.0.25" - ), + "mip_table_dir": mip_table_dir, "mode": "relaxed", "package": "round-1", "root_proc_dir": _get_required_env("ROOT_PROC_DIR"), diff --git a/CMEW/app/configure_standardise/bin/create_variables_file.py b/CMEW/app/configure_standardise/bin/create_variables_file.py deleted file mode 100755 index 7ab4ac44..00000000 --- a/CMEW/app/configure_standardise/bin/create_variables_file.py +++ /dev/null @@ -1,94 +0,0 @@ -#!/usr/bin/env python -# (C) Crown Copyright 2024-2026, Met Office. -# The LICENSE.md file contains full licensing details. -""" -Generates the variables.txt file from the ESMValTool recipe. -""" -import os - - -def parse_variables_from_recipe(recipe_path): - from esmvalcore.experimental.recipe import Recipe - - """Retrieve variables from ESMValTool recipe. - - * Read the ESMValTool recipe YAML file from the provided ``recipe_path`` - * For each diagnostic defined in the recipe, extract the variables required - for that diagnostic - * For each variable, extract the mip table name - * Output a newline-separated list of variables, with each line formatted - as ``/:`` - - Recipe file snippet:: - - diagnostics: - : - variables: - : - mip: - : - mip: - : - variables: - : - mip: - : - mip: - - Will be formatted as:: - - /: - /: - /: - /: - - Parameters - ---------- - recipe_path : str - Location of the ESMValTool recipe file. - - Returns - ------- - list[str] - List of variables from the ESMValTool recipe, - formatted as ``/:``. - """ - # For now, hard-code stream to apm, this is correct for Amon and Emon mip. - stream = "apm" - recipe = Recipe(recipe_path) - diagnostics = recipe.data["diagnostics"] - formatted_variables = [] - for diagnostic in diagnostics: - variables = diagnostics[diagnostic]["variables"] - for variable in variables: - mip = variables[variable]["mip"] - formatted_variable = f"{mip}/{variable}:{stream}" - if formatted_variable not in formatted_variables: - formatted_variables.append(formatted_variable) - return formatted_variables - - -def write_variables(variables, target_path): - """Write a string of variables to a text file in the installed workflow. - - Parameters - ---------- - variables : list[str] - List of variables to be written to file. - - target_path : str - Location to write the variables file. - """ - variables_str = "\n".join(variables) + "\n" - with open(target_path, "w") as target_file: - target_file.write(variables_str) - - -def main(): - recipe_path = os.environ["RECIPE_PATH"] - variables = parse_variables_from_recipe(recipe_path) - write_variables(variables, os.environ["VARIABLES_PATH"]) - - -if __name__ == "__main__": - main() diff --git a/CMEW/app/configure_standardise/bin/test_create_request_file.py b/CMEW/app/configure_standardise/bin/test_create_request_file.py index 716a788e..231a5e66 100644 --- a/CMEW/app/configure_standardise/bin/test_create_request_file.py +++ b/CMEW/app/configure_standardise/bin/test_create_request_file.py @@ -1,6 +1,8 @@ # (C) Crown Copyright 2024-2026, Met Office. # The LICENSE.md file contains full licensing details. + import os +import pytest from create_request_file import create_request @@ -18,10 +20,16 @@ def test_create_request(monkeypatch): monkeypatch.setenv("VARIABLES_PATH", "/path/to/variables.txt") monkeypatch.setenv("VARIANT_LABEL", "r1i1p1f1") + # required and must match developer config custom.cmor_path + monkeypatch.setenv( + "MIP_TABLE_DIR", "~cdds/etc/mip_tables/GCModelDev/0.0.25" + ) + config = create_request() actual = { section: dict(config.items(section)) for section in config.sections() } + expected = { "metadata": { "branch_method": "no parent", @@ -29,7 +37,12 @@ def test_create_request(monkeypatch): "base_date": "1850-01-01T00:00:00", "experiment_id": "amip", "institution_id": "MOHC", - "license": "GCModelDev model data is licensed under the Open Government License v3 (https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/)", # noqa: E501 + "license": ( + "GCModelDev model data is licensed under " + "the Open Government License v3 " + "(https://www.nationalarchives.gov.uk/doc" + "/open-government-licence/version/3/)" + ), "mip": "ESMVal", "mip_era": "GCModelDev", "model_id": "UKESM1-0-LL", @@ -68,4 +81,23 @@ def test_create_request(monkeypatch): "cylc_args": "--no-detach -v", }, } + assert actual == expected + + +# Added negative test to make regressions more obvious +def test_create_request_requires_mip_table_dir(monkeypatch): + monkeypatch.setenv("START_YEAR", "1993") + monkeypatch.setenv("NUMBER_OF_YEARS", "1") + monkeypatch.setenv("CALENDAR", "360_day") + monkeypatch.setenv("INSTITUTION_ID", "MOHC") + monkeypatch.setenv("MODEL_ID", "UKESM1-0-LL") + monkeypatch.setenv("ROOT_PROC_DIR", "/path/to/proc/dir/") + monkeypatch.setenv("ROOT_DATA_DIR", "/path/to/data/dir/") + monkeypatch.setenv("SUITE_ID", "u-az513") + monkeypatch.setenv("VARIABLES_PATH", "/path/to/variables.txt") + monkeypatch.setenv("VARIANT_LABEL", "r1i1p1f1") + monkeypatch.delenv("MIP_TABLE_DIR", raising=False) + + with pytest.raises(KeyError, match="MIP_TABLE_DIR must be set"): + create_request() diff --git a/CMEW/app/configure_standardise/bin/test_create_variables_file.py b/CMEW/app/create_variables_file/bin/test_create_variables_file.py similarity index 100% rename from CMEW/app/configure_standardise/bin/test_create_variables_file.py rename to CMEW/app/create_variables_file/bin/test_create_variables_file.py diff --git a/CMEW/flow.cylc b/CMEW/flow.cylc index ffc95752..fa1935f9 100644 --- a/CMEW/flow.cylc +++ b/CMEW/flow.cylc @@ -100,6 +100,7 @@ DRS_NATIVE6 = {{ DRS_NATIVE6 }} DRS_OBS4MIPS = {{ DRS_OBS4MIPS }} MAX_PARALLEL_TASKS = {{ MAX_PARALLEL_TASKS }} + MIP_TABLE_DIR = {{ MIP_TABLE_DIR }} ROOTPATH_CMIP3 = {{ ROOTPATH_CMIP3 }} ROOTPATH_CMIP5 = {{ ROOTPATH_CMIP5 }} ROOTPATH_CMIP6 = {{ ROOTPATH_CMIP6 }} @@ -130,6 +131,7 @@ REQUEST_PATH = ${CYLC_WORKFLOW_SHARE_DIR}/etc/request_%(dataset)s.cfg START_YEAR = {{ START_YEAR }} NUMBER_OF_YEARS = {{ NUMBER_OF_YEARS }} + MIP_TABLE_DIR = {{ MIP_TABLE_DIR }} [[standardise_model_data]] inherit = STANDARDISE diff --git a/CMEW/opt/rose-suite-metoffice.conf b/CMEW/opt/rose-suite-metoffice.conf index 23a087c5..5039dded 100644 --- a/CMEW/opt/rose-suite-metoffice.conf +++ b/CMEW/opt/rose-suite-metoffice.conf @@ -1,4 +1,4 @@ -# (C) Crown Copyright 2022-2025, Met Office. +# (C) Crown Copyright 2022-2026, Met Office. # The LICENSE.md file contains full licensing details. [template variables] @@ -6,6 +6,7 @@ CDDS_VERSION="3.2.0" DRS_CMIP6="BADC" DRS_OBS4MIPS="default" ESMVALTOOL_MODULE_NAME="scitools/community/esmvaltool/2.12.0" +MIP_TABLE_DIR="~cdds/etc/mip_tables/GCModelDev/0.0.25" ROOTPATH_CMIP6="/data/users/managecmip/champ/CMIP6" ROOTPATH_OBS4MIPS="/data/users/esmval/ESMValTool/temporary/obs" SITE="metoffice" diff --git a/CMEW/rose-suite.conf b/CMEW/rose-suite.conf index 7a8e60ca..ed265a21 100644 --- a/CMEW/rose-suite.conf +++ b/CMEW/rose-suite.conf @@ -18,6 +18,7 @@ ESMVALTOOL_MODULE_NAME="" INSTITUTION_ID="MOHC" LABEL_FOR_PLOTS="HadGEM3-GC5E N96ORCA1" MAX_PARALLEL_TASKS=4 +MIP_TABLE_DIR="" MODEL_ID="HadGEM3-GC5E-LL" NUMBER_OF_YEARS=1 REF_CALENDAR="360_day"