Update with code from sensAI 1.4.0

opcode81 · opcode81 · commit c17aafca85a0 · 2025-01-27T23:39:43.000+01:00
diff --git a/src/sensai/util/deprecation.py b/src/sensai/util/deprecation.py
@@ -15,7 +15,12 @@ def deprecated(message):
     def deprecated_decorator(func):
         @wraps(func)
         def deprecated_func(*args, **kwargs):
-            msg = "{} is a deprecated function. {}".format(func.__name__, message)
+            func_name = func.__name__
+            if func_name == "__init__":
+                class_name = func.__qualname__.split('.')[0]
+                msg = "{} is a deprecated class. {}".format(class_name, message)
+            else:
+                msg = "{} is a deprecated function. {}".format(func_name, message)
             if logging.Logger.root.hasHandlers():
                 log.warning(msg)
             else:
diff --git a/src/sensai/util/git.py b/src/sensai/util/git.py
@@ -0,0 +1,39 @@
+import logging
+from dataclasses import dataclass
+import subprocess
+from typing import Optional
+
+from .string import ToStringMixin
+
+log = logging.getLogger(__name__)
+
+
+@dataclass
+class GitStatus(ToStringMixin):
+    commit: str
+    has_unstaged_changes: bool
+    has_staged_uncommitted_changes: bool
+    has_untracked_files: bool
+
+    @property
+    def is_clean(self) -> bool:
+        return not (self.has_unstaged_changes or
+                    self.has_staged_uncommitted_changes or
+                    self.has_untracked_files)
+
+
+def git_status() -> Optional[GitStatus]:
+    try:
+        commit_hash = subprocess.check_output(['git', 'rev-parse', 'HEAD']).decode('ascii').strip()
+        unstaged = bool(subprocess.check_output(['git', 'diff', '--name-only']).decode('ascii').strip())
+        staged = bool(subprocess.check_output(['git', 'diff', '--staged', '--name-only']).decode('ascii').strip())
+        untracked = bool(subprocess.check_output(['git', 'ls-files', '--others', '--exclude-standard']).decode('ascii').strip())
+        return GitStatus(
+            commit=commit_hash,
+            has_unstaged_changes=unstaged,
+            has_staged_uncommitted_changes=staged,
+            has_untracked_files=untracked
+        )
+    except Exception as e:
+        log.error("Error determining Git status", exc_info=e)
+        return None
diff --git a/src/sensai/util/helper.py b/src/sensai/util/helper.py
@@ -2,7 +2,7 @@
 This module contains various helper functions.
 """
 import math
-from typing import Any, Sequence, Union, TypeVar, List, Optional, Dict, Container, Iterable
+from typing import Any, Sequence, Union, TypeVar, List, Optional, Dict, Container, Iterable, Tuple
 
 T = TypeVar("T")
 
@@ -110,3 +110,24 @@ def kwarg_if_not_none(arg_name: str, arg_value: Any) -> Dict[str, Any]:
         return {}
     else:
         return {arg_name: arg_value}
+
+
+def flatten_dict(d: Dict[str, Any], sep: str = '.') -> Dict[str, Any]:
+    """
+    Flatten a nested dictionary by concatenating nested keys with a separator.
+
+    :param d: the dictionary to flatten
+    :param sep: the separator to use in order to join the keys of nested dictionaries
+    :return: a flattened dictionary
+    """
+    def _flatten(d: Dict[str, Any], parent_key: str = '') -> List[Tuple[str, Any]]:
+        items: List[Tuple[str, Any]] = []
+        for k, v in d.items():
+            new_key = f"{parent_key}{sep}{k}" if parent_key else k
+            if isinstance(v, dict):
+                items.extend(_flatten(v, new_key))
+            else:
+                items.append((new_key, v))
+        return items
+
+    return dict(_flatten(d))
diff --git a/src/sensai/util/plot.py b/src/sensai/util/plot.py
@@ -181,6 +181,9 @@ def save(self, path):
         log.info(f"Saving figure in {path}")
         self.fig.savefig(path)
 
+    def show(self):
+        self.fig.show()
+
     def xtick(self: TPlot, major=None, minor=None) -> TPlot:
         """
         Sets a tick on every integer multiple of the given base values.
diff --git a/src/sensai/util/string.py b/src/sensai/util/string.py
@@ -84,6 +84,8 @@ def to_string(x, converter: StringConverter = None, apply_converter_to_non_compl
             return list_string(x, brackets="()", converter=converter)
         elif type(x) == dict:
             return dict_string(x, brackets="{}", converter=converter)
+        elif type(x) == str:
+            return repr(x)
         elif type(x) == types.MethodType:
             # could be bound method of a ToStringMixin instance (which would print the repr of the instance, which can potentially cause
             # an infinite recursion)
@@ -426,7 +428,7 @@ def take(cnt=1):
     def find_matching(j):
         start = j
         op = s[j]
-        cl = {"[": "]", "(": ")", "'": "'"}[s[j]]
+        cl = {"[": "]", "(": ")", "'": "'", "{": "}"}[s[j]]
         is_bracket = cl != s[j]
         stack = 0
         while j < len(s):
@@ -439,7 +441,7 @@ def find_matching(j):
             j += 1
         return None
 
-    brackets = "[("
+    brackets = "[({"
     quotes = "'"
     while i < len(s):
         is_bracket = s[i] in brackets
@@ -457,7 +459,7 @@ def find_matching(j):
                 take(1)
                 indent += 1
                 nl()
-        elif s[i] in "])":
+        elif s[i] in "])}":
             take(1)
             indent -= 1
         elif s[i:i+2] == ", ":
diff --git a/src/sensai/util/tensorboard.py b/src/sensai/util/tensorboard.py
@@ -0,0 +1,106 @@
+import numpy as np
+import pandas as pd
+from tensorboard.backend.event_processing import event_accumulator
+
+from matplotlib import pyplot as plt
+
+from .pandas import SeriesInterpolationLinearIndex
+
+
+class TensorboardData:
+    def __init__(self, events: event_accumulator.EventAccumulator):
+        self.events = events
+        self.events.Reload()
+
+    def get_series(self, tag: str, smoothing_factor: float = 0.0) -> pd.Series:
+        """
+        Gets the (smoothed) pandas Series for a specific tensorboard tag.
+
+        :param tag: the tensorboard tag
+        :param smoothing_factor: the smoothing factor between 0 and 1 which determines the relative importance of past values.
+            0: no smoothing
+            1: maximum smoothing (all values will be equal to the first value)
+        :return: the pandas series with the step as the index
+        """
+        if not 0 <= smoothing_factor <= 1:
+            raise ValueError("Smoothing factor must be between 0 and 1")
+
+        try:
+            scalar_events = self.events.Scalars(tag)
+        except KeyError:
+            raise KeyError(f"Tag '{tag}' not found in tensorboard events")
+
+        steps = [event.step for event in scalar_events]
+        values = [event.value for event in scalar_events]
+
+        if smoothing_factor > 0:
+            smoothed_values = []
+            last = values[0]
+            for value in values:
+                last = smoothing_factor * last + (1 - smoothing_factor) * value
+                smoothed_values.append(last)
+            values = smoothed_values
+
+        return pd.Series(values, index=steps, name=tag)
+
+    def get_tags(self) -> list[str]:
+        """
+        Get list of available scalar tags in the events.
+
+        :return: list of tag names
+        """
+        return self.events.Tags()['scalars']
+
+    def get_data_frame(self, tags: list[str] | None = None, smoothing_factor: float = 0.0) -> pd.DataFrame:
+        """
+        Gets multiple series as a DataFrame.
+
+        :param tags: the list of tensorboard tags to consider; if None, use all
+        :param smoothing_factor: smoothing factor to apply to all series
+        :return: DataFrame with steps as index and tags as columns
+        """
+        if tags is None:
+            tags = self.get_tags()
+        series_dict = {}
+        for tag in tags:
+            series = self.get_series(tag, smoothing_factor)
+            series_dict[series.name] = series
+
+        return pd.DataFrame(series_dict)
+
+
+class TensorboardSeriesComparison:
+    def __init__(self, tb_reference: TensorboardData, tb_current: TensorboardData,
+            tag: str, index_start: int, index_end: int):
+        s_ref = tb_reference.get_series(tag)
+        s_cur = tb_current.get_series(tag)
+
+        interp = SeriesInterpolationLinearIndex(ffill=True, bfill=True)
+        s_ref, s_cur = interp.interpolate_all_with_combined_index([s_ref, s_cur])
+
+        self.s_ref = s_ref.loc[index_start:index_end]
+        self.s_cur = s_cur.loc[index_start:index_end]
+
+    def mean_relative_difference(self):
+        """
+        Computes the difference between the current series and the reference series, relative to the reference,
+        e.g. if the current series is on average 105% of the reference series (5% relative difference), then
+        the value will be 0.05.
+        Since we divide by the absolute value of the reference, this also works for negative cases, i.e.
+        if the reference series value is -0.10 and the current series value is -0.08, then the relative
+        difference is 0.2 (20%).
+
+        :return: the mean relative difference
+        """
+        diff = self.s_cur - self.s_ref
+        diff_rel = diff / abs(self.s_ref)
+        return np.mean(diff_rel)
+
+    def plot_series(self, show=False) -> plt.Figure:
+        fig = plt.figure()
+        self.s_ref.plot()
+        self.s_cur.plot()
+        plt.title(self.s_ref.name)
+        if show:
+            plt.show()
+        return fig