From a57b8725a27e62a00086bc1cdd114c66280a9c72 Mon Sep 17 00:00:00 2001 From: Vikas Pandey Date: Wed, 3 Sep 2025 15:48:00 +0000 Subject: [PATCH 1/3] improve auto-select logic and handle missing data --- .../lowcode/common/transformations.py | 2 + .../operator/lowcode/forecast/__main__.py | 20 +++++++--- tests/operators/forecast/test_datasets.py | 40 +++++++++++++++++++ 3 files changed, 56 insertions(+), 6 deletions(-) diff --git a/ads/opctl/operator/lowcode/common/transformations.py b/ads/opctl/operator/lowcode/common/transformations.py index b5367fe19..ccd168b08 100644 --- a/ads/opctl/operator/lowcode/common/transformations.py +++ b/ads/opctl/operator/lowcode/common/transformations.py @@ -329,6 +329,8 @@ def build_fforms_meta_features(self, data, target_col=None, group_cols=None): if target_col not in data.columns: raise ValueError(f"Target column '{target_col}' not found in DataFrame") + data[target_col] = data[target_col].fillna(0) + # Check if group_cols are provided and valid if group_cols is not None: if not isinstance(group_cols, list): diff --git a/ads/opctl/operator/lowcode/forecast/__main__.py b/ads/opctl/operator/lowcode/forecast/__main__.py index 57809886d..1b69bd76a 100644 --- a/ads/opctl/operator/lowcode/forecast/__main__.py +++ b/ads/opctl/operator/lowcode/forecast/__main__.py @@ -9,14 +9,13 @@ import sys from typing import Dict, List -import pandas as pd import yaml from ads.opctl import logger from ads.opctl.operator.common.const import ENV_OPERATOR_ARGS from ads.opctl.operator.common.utils import _parse_input_args -from .const import AUTO_SELECT_SERIES +from .const import AUTO_SELECT, AUTO_SELECT_SERIES from .model.forecast_datasets import ForecastDatasets, ForecastResults from .operator_config import ForecastOperatorConfig from .whatifserve import ModelDeploymentManager @@ -29,8 +28,10 @@ def operate(operator_config: ForecastOperatorConfig) -> ForecastResults: datasets = ForecastDatasets(operator_config) model = ForecastOperatorModelFactory.get_model(operator_config, datasets) - if operator_config.spec.model == AUTO_SELECT_SERIES and hasattr( - operator_config.spec, "meta_features" + if ( + operator_config.spec.model == AUTO_SELECT_SERIES + and hasattr(operator_config.spec, "meta_features") + and operator_config.spec.target_category_columns ): # For AUTO_SELECT_SERIES, handle each series with its specific model meta_features = operator_config.spec.meta_features @@ -64,8 +65,6 @@ def operate(operator_config: ForecastOperatorConfig) -> ForecastResults: ) sub_results_list.append(sub_results) - # results_df = pd.concat([results_df, sub_result_df], ignore_index=True, axis=0) - # elapsed_time += sub_elapsed_time # Merge all sub_results into a single ForecastResults object if sub_results_list: results = sub_results_list[0] @@ -75,6 +74,15 @@ def operate(operator_config: ForecastOperatorConfig) -> ForecastResults: results = None else: + # When AUTO_SELECT_SERIES is specified but target_category_columns is not, + # we fall back to AUTO_SELECT behavior. + if ( + operator_config.spec.model == AUTO_SELECT_SERIES + and not operator_config.spec.target_category_columns + ): + + operator_config.spec.model = AUTO_SELECT + model = ForecastOperatorModelFactory.get_model(operator_config, datasets) # For other cases, use the single selected model results = model.generate_report() # saving to model catalog diff --git a/tests/operators/forecast/test_datasets.py b/tests/operators/forecast/test_datasets.py index 8460bbea7..d4e129f49 100644 --- a/tests/operators/forecast/test_datasets.py +++ b/tests/operators/forecast/test_datasets.py @@ -12,6 +12,7 @@ import pandas as pd import pytest import yaml +import numpy as np from ads.opctl.operator.cmd import run from ads.opctl.operator.lowcode.forecast.__main__ import operate as forecast_operate @@ -413,5 +414,44 @@ def run_operator( # generate_train_metrics = True +def test_missing_data_autoselect_series(): + """Test case for auto-select-series with missing data.""" + data = { + "Date": pd.to_datetime( + [ + "2023-01-01", + "2023-01-02", + "2023-01-03", + "2023-01-04", + "2023-01-05", + "2023-01-06", + "2023-01-07", + "2023-01-08", + "2023-01-09", + "2023-01-10", + ] + ), + "Y": [1, 2, np.nan, 4, 5, 6, 7, 8, 9, 10], + "Category": ["A", "A", "A", "A", "A", "A", "A", "A", "A", "A"], + } + df = pd.DataFrame(data) + + with tempfile.TemporaryDirectory() as tmpdirname: + output_data_path = f"{tmpdirname}/results" + yaml_i = deepcopy(TEMPLATE_YAML) + yaml_i["spec"]["model"] = "auto-select-series" + yaml_i["spec"]["historical_data"].pop("url") + yaml_i["spec"]["historical_data"]["data"] = df + yaml_i["spec"]["target_column"] = "Y" + yaml_i["spec"]["datetime_column"]["name"] = "Date" + yaml_i["spec"]["target_category_columns"] = ["Category"] + yaml_i["spec"]["horizon"] = 2 + yaml_i["spec"]["output_directory"]["url"] = output_data_path + + operator_config = ForecastOperatorConfig.from_dict(yaml_i) + forecast_operate(operator_config) + check_output_for_errors(output_data_path) + + if __name__ == "__main__": pass From 041e3c40d580c35e030607723d496a36683df9da Mon Sep 17 00:00:00 2001 From: Vikas Pandey Date: Mon, 8 Sep 2025 08:13:42 +0530 Subject: [PATCH 2/3] comment out the test case --- tests/operators/forecast/test_datasets.py | 72 +++++++++++------------ 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/tests/operators/forecast/test_datasets.py b/tests/operators/forecast/test_datasets.py index d4e129f49..deb9c851c 100644 --- a/tests/operators/forecast/test_datasets.py +++ b/tests/operators/forecast/test_datasets.py @@ -414,43 +414,43 @@ def run_operator( # generate_train_metrics = True -def test_missing_data_autoselect_series(): - """Test case for auto-select-series with missing data.""" - data = { - "Date": pd.to_datetime( - [ - "2023-01-01", - "2023-01-02", - "2023-01-03", - "2023-01-04", - "2023-01-05", - "2023-01-06", - "2023-01-07", - "2023-01-08", - "2023-01-09", - "2023-01-10", - ] - ), - "Y": [1, 2, np.nan, 4, 5, 6, 7, 8, 9, 10], - "Category": ["A", "A", "A", "A", "A", "A", "A", "A", "A", "A"], - } - df = pd.DataFrame(data) +# def test_missing_data_autoselect_series(): +# """Test case for auto-select-series with missing data.""" +# data = { +# "Date": pd.to_datetime( +# [ +# "2023-01-01", +# "2023-01-02", +# "2023-01-03", +# "2023-01-04", +# "2023-01-05", +# "2023-01-06", +# "2023-01-07", +# "2023-01-08", +# "2023-01-09", +# "2023-01-10", +# ] +# ), +# "Y": [1, 2, np.nan, 4, 5, 6, 7, 8, 9, 10], +# "Category": ["A", "A", "A", "A", "A", "A", "A", "A", "A", "A"], +# } +# df = pd.DataFrame(data) - with tempfile.TemporaryDirectory() as tmpdirname: - output_data_path = f"{tmpdirname}/results" - yaml_i = deepcopy(TEMPLATE_YAML) - yaml_i["spec"]["model"] = "auto-select-series" - yaml_i["spec"]["historical_data"].pop("url") - yaml_i["spec"]["historical_data"]["data"] = df - yaml_i["spec"]["target_column"] = "Y" - yaml_i["spec"]["datetime_column"]["name"] = "Date" - yaml_i["spec"]["target_category_columns"] = ["Category"] - yaml_i["spec"]["horizon"] = 2 - yaml_i["spec"]["output_directory"]["url"] = output_data_path - - operator_config = ForecastOperatorConfig.from_dict(yaml_i) - forecast_operate(operator_config) - check_output_for_errors(output_data_path) +# with tempfile.TemporaryDirectory() as tmpdirname: +# output_data_path = f"{tmpdirname}/results" +# yaml_i = deepcopy(TEMPLATE_YAML) +# yaml_i["spec"]["model"] = "auto-select-series" +# yaml_i["spec"]["historical_data"].pop("url") +# yaml_i["spec"]["historical_data"]["data"] = df +# yaml_i["spec"]["target_column"] = "Y" +# yaml_i["spec"]["datetime_column"]["name"] = "Date" +# yaml_i["spec"]["target_category_columns"] = ["Category"] +# yaml_i["spec"]["horizon"] = 2 +# yaml_i["spec"]["output_directory"]["url"] = output_data_path + +# operator_config = ForecastOperatorConfig.from_dict(yaml_i) +# forecast_operate(operator_config) +# check_output_for_errors(output_data_path) if __name__ == "__main__": From d73ba64abdd01ca1f745c8ef6eb4ab8e52a4f840 Mon Sep 17 00:00:00 2001 From: Vikas Pandey Date: Wed, 15 Oct 2025 10:42:46 +0530 Subject: [PATCH 3/3] Add warning for AUTO_SELECT_SERIES usage and restore test case for missing data --- .../operator/lowcode/forecast/__main__.py | 5 ++ tests/operators/forecast/test_datasets.py | 72 +++++++++---------- 2 files changed, 41 insertions(+), 36 deletions(-) diff --git a/ads/opctl/operator/lowcode/forecast/__main__.py b/ads/opctl/operator/lowcode/forecast/__main__.py index 1b69bd76a..8bcf21c3c 100644 --- a/ads/opctl/operator/lowcode/forecast/__main__.py +++ b/ads/opctl/operator/lowcode/forecast/__main__.py @@ -81,6 +81,11 @@ def operate(operator_config: ForecastOperatorConfig) -> ForecastResults: and not operator_config.spec.target_category_columns ): + logger.warning( + "AUTO_SELECT_SERIES cannot be run with a single-series dataset or when " + "'target_category_columns' is not provided. Falling back to AUTO_SELECT." + ) + operator_config.spec.model = AUTO_SELECT model = ForecastOperatorModelFactory.get_model(operator_config, datasets) # For other cases, use the single selected model diff --git a/tests/operators/forecast/test_datasets.py b/tests/operators/forecast/test_datasets.py index deb9c851c..d4e129f49 100644 --- a/tests/operators/forecast/test_datasets.py +++ b/tests/operators/forecast/test_datasets.py @@ -414,43 +414,43 @@ def run_operator( # generate_train_metrics = True -# def test_missing_data_autoselect_series(): -# """Test case for auto-select-series with missing data.""" -# data = { -# "Date": pd.to_datetime( -# [ -# "2023-01-01", -# "2023-01-02", -# "2023-01-03", -# "2023-01-04", -# "2023-01-05", -# "2023-01-06", -# "2023-01-07", -# "2023-01-08", -# "2023-01-09", -# "2023-01-10", -# ] -# ), -# "Y": [1, 2, np.nan, 4, 5, 6, 7, 8, 9, 10], -# "Category": ["A", "A", "A", "A", "A", "A", "A", "A", "A", "A"], -# } -# df = pd.DataFrame(data) +def test_missing_data_autoselect_series(): + """Test case for auto-select-series with missing data.""" + data = { + "Date": pd.to_datetime( + [ + "2023-01-01", + "2023-01-02", + "2023-01-03", + "2023-01-04", + "2023-01-05", + "2023-01-06", + "2023-01-07", + "2023-01-08", + "2023-01-09", + "2023-01-10", + ] + ), + "Y": [1, 2, np.nan, 4, 5, 6, 7, 8, 9, 10], + "Category": ["A", "A", "A", "A", "A", "A", "A", "A", "A", "A"], + } + df = pd.DataFrame(data) -# with tempfile.TemporaryDirectory() as tmpdirname: -# output_data_path = f"{tmpdirname}/results" -# yaml_i = deepcopy(TEMPLATE_YAML) -# yaml_i["spec"]["model"] = "auto-select-series" -# yaml_i["spec"]["historical_data"].pop("url") -# yaml_i["spec"]["historical_data"]["data"] = df -# yaml_i["spec"]["target_column"] = "Y" -# yaml_i["spec"]["datetime_column"]["name"] = "Date" -# yaml_i["spec"]["target_category_columns"] = ["Category"] -# yaml_i["spec"]["horizon"] = 2 -# yaml_i["spec"]["output_directory"]["url"] = output_data_path - -# operator_config = ForecastOperatorConfig.from_dict(yaml_i) -# forecast_operate(operator_config) -# check_output_for_errors(output_data_path) + with tempfile.TemporaryDirectory() as tmpdirname: + output_data_path = f"{tmpdirname}/results" + yaml_i = deepcopy(TEMPLATE_YAML) + yaml_i["spec"]["model"] = "auto-select-series" + yaml_i["spec"]["historical_data"].pop("url") + yaml_i["spec"]["historical_data"]["data"] = df + yaml_i["spec"]["target_column"] = "Y" + yaml_i["spec"]["datetime_column"]["name"] = "Date" + yaml_i["spec"]["target_category_columns"] = ["Category"] + yaml_i["spec"]["horizon"] = 2 + yaml_i["spec"]["output_directory"]["url"] = output_data_path + + operator_config = ForecastOperatorConfig.from_dict(yaml_i) + forecast_operate(operator_config) + check_output_for_errors(output_data_path) if __name__ == "__main__":