diff --git a/pyproject.toml b/pyproject.toml index 79601ec..99dc7a7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,6 +22,7 @@ dependencies = [ 'pandas>=1.0.0,<2', 'scikit-learn>=0.22.1,<2', 'orion-ml>=0.6,<0.8', + 'ml-stars>=0.2.1,<0.4', 'openai>=1.46.0', 'tiktoken>=0.7.0', 'transformers>=4.44.2', diff --git a/sigllm/core.py b/sigllm/core.py index 0008002..5b621b0 100644 --- a/sigllm/core.py +++ b/sigllm/core.py @@ -16,7 +16,7 @@ INTERVAL_PRIMITIVE = 'mlstars.custom.timeseries_preprocessing.time_segments_aggregate#1' DECIMAL_PRIMITIVE = 'sigllm.primitives.transformation.Float2Scalar#1' -WINDOW_SIZE_PRIMITIVE = 'sigllm.primitives.forecasting.custom.rolling_window_sequences#1' +WINDOW_SIZE_PRIMITIVE = 'mlstars.custom.timeseries_preprocessing.rolling_window_sequences#1' class SigLLM(Orion): diff --git a/sigllm/pipelines/detector/gpt_detector.json b/sigllm/pipelines/detector/gpt_detector.json index d763635..78390b5 100644 --- a/sigllm/pipelines/detector/gpt_detector.json +++ b/sigllm/pipelines/detector/gpt_detector.json @@ -3,7 +3,7 @@ "mlstars.custom.timeseries_preprocessing.time_segments_aggregate", "sklearn.impute.SimpleImputer", "sigllm.primitives.transformation.Float2Scalar", - "sigllm.primitives.forecasting.custom.rolling_window_sequences", + "mlstars.custom.timeseries_preprocessing.rolling_window_sequences", "sigllm.primitives.transformation.format_as_string", "sigllm.primitives.forecasting.gpt.GPT", "sigllm.primitives.transformation.format_as_integer", @@ -23,7 +23,7 @@ "decimal": 2, "rescale": true }, - "sigllm.primitives.forecasting.custom.rolling_window_sequences#1": { + "mlstars.custom.timeseries_preprocessing.rolling_window_sequences#1": { "target_column": 0, "window_size": 140, "target_size": 1 diff --git a/sigllm/pipelines/detector/mistral_detector.json b/sigllm/pipelines/detector/mistral_detector.json index ecd7d3e..5200762 100644 --- a/sigllm/pipelines/detector/mistral_detector.json +++ b/sigllm/pipelines/detector/mistral_detector.json @@ -3,7 +3,7 @@ "mlstars.custom.timeseries_preprocessing.time_segments_aggregate", "sklearn.impute.SimpleImputer", "sigllm.primitives.transformation.Float2Scalar", - "sigllm.primitives.forecasting.custom.rolling_window_sequences", + "mlstars.custom.timeseries_preprocessing.rolling_window_sequences", "sigllm.primitives.transformation.format_as_string", "sigllm.primitives.forecasting.huggingface.HF", "sigllm.primitives.transformation.format_as_integer", @@ -23,7 +23,7 @@ "decimal": 2, "rescale": true }, - "sigllm.primitives.forecasting.custom.rolling_window_sequences#1": { + "mlstars.custom.timeseries_preprocessing.rolling_window_sequences#1": { "target_column": 0, "window_size": 140, "target_size": 1 diff --git a/sigllm/primitives/forecasting/custom.py b/sigllm/primitives/forecasting/custom.py deleted file mode 100644 index 2d9f29b..0000000 --- a/sigllm/primitives/forecasting/custom.py +++ /dev/null @@ -1,53 +0,0 @@ -# -*- coding: utf-8 -*- - -import numpy as np - - -def rolling_window_sequences(X, y, index, window_size, target_size, step_size, target_column): - """Create rolling window sequences out of time series data. - - The function creates an array of input sequences and an array of target sequences by rolling - over the input sequence with a specified window. - Optionally, certain values can be dropped from the sequences. - - Args: - X (ndarray): - N-dimensional input sequence to iterate over. - y (ndarray): - N-dimensional target sequence to iterate over. - index (ndarray): - Array containing the index values of X. - window_size (int): - Length of the input sequences. - target_size (int): - Length of the target sequences. - step_size (int): - Indicating the number of steps to move the window forward each round. - target_column (int): - Indicating which column of X is the target. - - Returns: - ndarray, ndarray, ndarray, ndarray: - * input sequences. - * target sequences. - * first index value of each input sequence. - * first index value of each target sequence. - """ - out_X = list() - out_y = list() - X_index = list() - y_index = list() - target = y[:, target_column] - - start = 0 - max_start = len(X) - window_size - target_size + 1 - while start < max_start: - end = start + window_size - - out_X.append(X[start:end]) - out_y.append(target[end : end + target_size]) - X_index.append(index[start]) - y_index.append(index[end]) - start = start + step_size - - return np.asarray(out_X), np.asarray(out_y), np.asarray(X_index), np.asarray(y_index) diff --git a/sigllm/primitives/jsons/sigllm.primitives.forecasting.custom.rolling_window_sequences.json b/sigllm/primitives/jsons/sigllm.primitives.forecasting.custom.rolling_window_sequences.json deleted file mode 100644 index 9cdb8c1..0000000 --- a/sigllm/primitives/jsons/sigllm.primitives.forecasting.custom.rolling_window_sequences.json +++ /dev/null @@ -1,69 +0,0 @@ -{ - "name": "sigllm.primitives.forecasting.custom.rolling_window_sequences", - "contributors": [ - "Sarah Alnegheimish " - ], - "description": "Create rolling window sequences out of timeseries data.", - "classifiers": { - "type": "preprocessor", - "subtype": "feature_extractor" - }, - "modalities": [ - "timeseries" - ], - "primitive": "sigllm.primitives.forecasting.custom.rolling_window_sequences", - "produce": { - "args": [ - { - "name": "X", - "type": "ndarray" - }, - { - "name": "y", - "type": "ndarray" - }, - { - "name": "index", - "type": "ndarray" - } - ], - "output": [ - { - "name": "X", - "type": "ndarray" - }, - { - "name": "y", - "type": "ndarray" - }, - { - "name": "index", - "type": "ndarray" - }, - { - "name": "target_index", - "type": "ndarray" - } - ] - }, - "hyperparameters": { - "fixed": { - "window_size": { - "type": "int", - "default": 250 - }, - "target_size": { - "type": "int", - "default": 1 - }, - "step_size": { - "type": "int", - "default": 1 - }, - "target_column": { - "type": "str or int", - "default": 1 - } - } - } -} diff --git a/tutorials/pipelines/detector-pipeline.ipynb b/tutorials/pipelines/detector-pipeline.ipynb index 60fa97a..2504bcb 100644 --- a/tutorials/pipelines/detector-pipeline.ipynb +++ b/tutorials/pipelines/detector-pipeline.ipynb @@ -190,7 +190,7 @@ "['mlstars.custom.timeseries_preprocessing.time_segments_aggregate',\n", " 'sklearn.impute.SimpleImputer',\n", " 'sigllm.primitives.transformation.Float2Scalar',\n", - " 'sigllm.primitives.forecasting.custom.rolling_window_sequences',\n", + " 'mlstars.custom.timeseries_preprocessing.rolling_window_sequences',\n", " 'sigllm.primitives.transformation.format_as_string',\n", " 'sigllm.primitives.forecasting.huggingface.HF',\n", " 'sigllm.primitives.transformation.format_as_integer',\n",