cschlaffner · RogerAK · Jun 21, 2024 · Jul 2, 2024 · Jul 2, 2024 · Jul 10, 2024
diff --git a/protzilla/constants/colors.py b/protzilla/constants/colors.py
@@ -1,8 +1,44 @@
 PROTZILLA_DISCRETE_COLOR_SEQUENCE = [
-    "#4A536A",
-    "#87A8B9",
-    "#CE5A5A",
-    "#8E3325",
-    "#E2A46D",
+    # Set 1: Muted Dark Slate
+    "#252935", "#3A3F50", "#50556A", "#6B7186", "#858DA2",
+    # Set 2: Muted Indian Red
+    "#CE5A5A", "#B24C4C", "#9D3F3F", "#E07272", "#F48D8D",
+    # Set 3: Muted Light Steel Blue
+    "#51646F", "#6A7D89", "#7F92A0", "#96A9B8", "#ADBFCD",
+    # Set 4: Muted Sienna
+    "#804538", "#6F3C31", "#5F342A", "#A05748", "#B66E5E",
+    # Set 5: Muted Sandy Brown
+    "#715236", "#63472F", "#57402B", "#96755A", "#A98575",
+    # Set 6: Muted Olive
+    "#6E6B48", "#5D5B3E", "#4E4D36", "#89875C", "#A1A16E",
+    # Set 7: Muted Teal
+    "#3B6B6A", "#315B5B", "#274C4C", "#507E7E", "#6B9898",
+    # Set 8: Muted Taupe
+    "#8B7E74", "#776F65", "#675E56", "#A09085", "#B9AAA1",
+    # Set 9: Muted Burgundy
+    "#7B3A4F", "#6A3345", "#582C3C", "#925664", "#A8737E",
+    # Set 10: Muted Forest Green
+    "#3D5047", "#35453E", "#2D3B35", "#5F7267", "#7B8D80",
+    # Set 11: Muted Navy
+    "#2F3E4C", "#283442", "#222B38", "#485669", "#627185",
+    # Set 12: Muted Mustard
+    "#BFA054", "#A98F4A", "#927D3F", "#D7BA75", "#E2CD96",
+    # Set 13: Muted Dusty Rose
+    "#C18394", "#AA727E", "#93616C", "#D69BA7", "#E4B8C2",
+    # Set 14: Muted Lavender
+    "#8A729D", "#7A638C", "#6A547C", "#A591B3", "#BDA9C8",
+    # Set 15: Muted Charcoal
+    "#404040", "#353535", "#2B2B2B", "#585858", "#707070",
+    # Set 16: Muted Emerald Green
+    "#4D7456", "#426448", "#37563B", "#6A9177", "#85A990",
+    # Set 17: Muted Peach
+    "#D89B83", "#C2866F", "#A7725E", "#E3B39C", "#ECC7B6",
+    # Set 18: Muted Plum
+    "#704F6E", "#634464", "#563A59", "#876A87", "#A18AA1",
+    # Set 19: Muted Periwinkle
+    "#7E8DAF", "#6F7B98", "#616A82", "#97A3BF", "#B0B9D1",
+    # Set 20: Muted Coral
+    "#CC7A5E", "#B26951", "#9A5A45", "#DD937C", "#EBAA99"
 ]
+
 PROTZILLA_DISCRETE_COLOR_OUTLIER_SEQUENCE = ["#4A536A", "#CE5A5A"]
diff --git a/protzilla/data_analysis/time_series_helper.py b/protzilla/data_analysis/time_series_helper.py
@@ -0,0 +1,15 @@
+from datetime import datetime
+
+def convert_time_to_hours(time_str):
+    """
+    Convert a string time to the number of hours since midnight.
+    :param time_str: The time string to convert in format '%H:%M:%S'
+
+    :return: Number of hours since midnight as a float
+    """
+
+    """
+    time_obj = datetime.strptime(time_str, '%H:%M:%S')
+    hours_since_midnight = time_obj.hour + time_obj.minute / 60 + time_obj.second / 3600
+    """
+    return time_str
diff --git a/protzilla/data_analysis/time_series_plots.py b/protzilla/data_analysis/time_series_plots.py
@@ -0,0 +1,179 @@
+import pandas as pd
+import plotly.graph_objects as go
+from scipy import stats
+from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances
+
+from protzilla.utilities.transform_dfs import is_long_format, long_to_wide_time
+from protzilla.constants.colors import PROTZILLA_DISCRETE_COLOR_SEQUENCE
+
+# Define color constants
+colors = {
+    "plot_bgcolor": "white",
+    "gridcolor": "#F1F1F1",
+    "linecolor": "#F1F1F1",
+    "annotation_text_color": "#ffffff",
+    "annotation_proteins_of_interest": "#4A536A",
+}
+
+def time_quant_plot(
+    intensity_df: pd.DataFrame,
+    metadata_df: pd.DataFrame,
+    time_column: str,
+    protein_group: str,
+    similarity: float = 1.0,
+    similarity_measure: str = "euclidean distance",
+) -> dict:
+    """
+    A function to create a graph visualising protein quantifications across all samples
+    as a line diagram using time. It's possible to select one proteingroup
+    that will be displayed in orange and choose a similarity measurement with a similarity score
+    to get all proteingroups that are similar displayed in another color in this line diagram.
+    All other proteingroups are displayed in the background as a grey polygon.
+
+    :param intensity_df: A dataframe in protzilla wide format, where each row
+        represents a sample and each column represents a feature.
+    :param metadata_df: A dataframe containing the metadata of the samples.
+    :param time_column: The name of the column in the metadata_df that contains the time information.
+    :param protein_group: Protein IDs as the columnheader of the dataframe
+    :param similarity_measure: method to compare the chosen proteingroup with all others. The two
+        methods are "cosine similarity" and "euclidean distance".
+    :param similarity: similarity score of the chosen similarity measurement method.
+
+    :return: returns a dictionary containing a list with a plotly figure and/or a list of messages
+    """
+
+    intensity_df = pd.merge(
+        left=intensity_df,
+        right=metadata_df[["Sample", time_column]],
+        on="Sample",
+        copy=False,
+    )
+
+    wide_df = intensity_df.interpolate(method='linear', axis=0)
+    wide_df = long_to_wide_time(wide_df, time_column=time_column) if is_long_format(wide_df, time_column=time_column) else  wide_df
+
+
+    if protein_group not in wide_df.columns:
+        raise ValueError("Please select a valid protein group.")
+    elif similarity_measure == "euclidean distance" and similarity < 0:
+        raise ValueError(
+            "Similarity for euclidean distance should be greater than or equal to 0."
+        )
+    elif similarity_measure == "cosine similarity" and (
+            similarity < -1 or similarity > 1
+    ):
+        raise ValueError("Similarity for cosine similarity should be between -1 and 1")
+
+    fig = go.Figure()
+
+    color_mapping = {
+        "A": PROTZILLA_DISCRETE_COLOR_SEQUENCE[0],
+        "C": PROTZILLA_DISCRETE_COLOR_SEQUENCE[4],
+    }
+
+    lower_upper_x = []
+    lower_upper_y = []
+
+    lower_upper_x.append(wide_df.index[0])
+    lower_upper_y.append(wide_df.iloc[0].min())
+
+    for index, row in wide_df.iterrows():
+        lower_upper_x.append(index)
+        lower_upper_y.append(row.max())
+
+    for index, row in reversed(list(wide_df.iterrows())):
+        lower_upper_x.append(index)
+        lower_upper_y.append(row.min())
+
+    fig.add_trace(
+        go.Scatter(
+            x=lower_upper_x,
+            y=lower_upper_y,
+            fill="toself",
+            name="Intensity Range",
+            line=dict(color="silver"),
+        )
+    )
+
+    similar_groups = []
+    for group_to_compare in wide_df.columns:
+        if group_to_compare != protein_group:
+            if similarity_measure == "euclidean distance":
+                distance = euclidean_distances(
+                    stats.zscore(wide_df[protein_group]).values.reshape(1, -1),
+                    stats.zscore(wide_df[group_to_compare]).values.reshape(1, -1),
+                )[0][0]
+            else:
+                distance = cosine_similarity(
+                    stats.zscore(wide_df[protein_group]).values.reshape(1, -1),
+                    stats.zscore(wide_df[group_to_compare]).values.reshape(1, -1),
+                )[0][0]
+            if similarity_measure == "euclidean distance":
+                if distance <= similarity:
+                    similar_groups.append(group_to_compare)
+            else:
+                if distance >= similarity:
+                    similar_groups.append(group_to_compare)
+
+    for group in similar_groups:
+        fig.add_trace(
+            go.Scatter(
+                x=wide_df.index,
+                y=wide_df[group],
+                mode="lines",
+                name=group[:15] + "..." if len(group) > 15 else group,
+                line=dict(color=PROTZILLA_DISCRETE_COLOR_SEQUENCE[9]),
+                showlegend=len(similar_groups) <= 7,
+            )
+        )
+
+    if len(similar_groups) > 7:
+        fig.add_trace(
+            go.Scatter(
+                x=[None],
+                y=[None],
+                mode="lines",
+                line=dict(color=PROTZILLA_DISCRETE_COLOR_SEQUENCE[9]),
+                name="Similar Protein Groups",
+            )
+        )
+
+    formatted_protein_name = (
+        protein_group[:15] + "..." if len(protein_group) > 15 else protein_group
+    )
+    fig.add_trace(
+        go.Scatter(
+            x=wide_df.index,
+            y=wide_df[protein_group],
+            mode="lines",
+            name=formatted_protein_name,
+            line=dict(color=PROTZILLA_DISCRETE_COLOR_SEQUENCE[4]),
+        )
+    )
+    fig.update_layout(
+        title=f"Time Series of {formatted_protein_name} in all samples",
+        plot_bgcolor=colors["plot_bgcolor"],
+        xaxis_gridcolor=colors["gridcolor"],
+        yaxis_gridcolor=colors["gridcolor"],
+        xaxis_linecolor=colors["linecolor"],
+        yaxis_linecolor=colors["linecolor"],
+        xaxis_title=time_column,
+        yaxis_title="Intensity",
+        legend_title="Legend",
+        xaxis=dict(
+            tickmode="array",
+            tickangle=0,
+            tickvals=wide_df.index,
+            ticktext=[wide_df[time_column].unique() for wide_df[time_column] in wide_df.index],
+        ),
+        autosize=True,
+        margin=dict(l=100, r=300, t=100, b=100),
+        legend=dict(
+            x=1.05,
+            y=1,
+            bgcolor="rgba(255, 255, 255, 0.5)",
+            orientation="v",
+        ),
+    )
+
+    return dict(plots=[fig])