cschlaffner · lucatreide · Jun 12, 2024 · Jun 13, 2024 · Jun 17, 2024 · Jun 17, 2024
diff --git a/protzilla/data_analysis/ptm_quantification/__init__.py b/protzilla/data_analysis/ptm_quantification/__init__.py
diff --git a/...zilla/data_analysis/ptm_quantification.py → ...analysis/ptm_quantification/flexiquant.py b/...zilla/data_analysis/ptm_quantification.py → ...analysis/ptm_quantification/flexiquant.py
diff --git a/protzilla/data_analysis/ptm_quantification/multiflex.py b/protzilla/data_analysis/ptm_quantification/multiflex.py
diff --git a/protzilla/importing/peptide_import.py b/protzilla/importing/peptide_import.py
@@ -65,7 +65,7 @@ def peptide_import(file_path, intensity_name, map_to_uniprot) -> dict:
     )
     cleaned = ordered.assign(**{"Protein ID": new_groups})
 
-    return dict(peptide_df=cleaned)
+    return dict(peptide_df=ordered)
 
 
 def evidence_import(file_path, intensity_name, map_to_uniprot) -> dict:

diff --git a/protzilla/methods/data_analysis.py b/protzilla/methods/data_analysis.py
@@ -27,7 +27,8 @@
     ptms_per_protein_and_sample,
     ptms_per_sample,
 )
-from protzilla.data_analysis.ptm_quantification import flexiquant_lf
+from protzilla.data_analysis.ptm_quantification.flexiquant import flexiquant_lf
+from protzilla.data_analysis.ptm_quantification.multiflex import multiflex_lf
 from protzilla.methods.data_preprocessing import TransformationLog
 from protzilla.steps import Plots, Step, StepManager
 
@@ -705,10 +706,10 @@ class FLEXIQuantLF(PlotStep):
         "peptide_df",
         "metadata_df",
         "reference_group",
+        "grouping_column",
         "protein_id",
         "num_init",
         "mod_cutoff",
-        "grouping_column",
     ]
     output_keys = [
         "raw_scores",
@@ -726,6 +727,44 @@ def insert_dataframes(self, steps: StepManager, inputs) -> dict:
         )
 
         inputs["metadata_df"] = steps.metadata_df
+        return inputs
+
+
+class MultiFLEXLF(PlotStep):
+    display_name = "MultiFLEX-LF"
+    operation = "modification_quantification"
+    method_description = "Quantifies the extent of protein modifications in proteomics data by using robust linear regression to compare modified and unmodified peptide precursors and facilitates the analysis of modification dynamics and coregulated modifications across large datasets without the need for preselecting specific proteins."
+
+    input_keys = [
+        "peptide_df",
+        "metadata_df",
+        "reference_group",
+        "num_init",
+        "mod_cutoff",
+        "imputation_cosine_similarity",
+        "deseq2_normalization",
+        "colormap",
+    ]
+
+    output_keys = [
+        "RM_scores_clustered",
+        "diff_modified",
+        "raw_scores",
+        "removed_peptides",
+        "RM_scores",
+    ]
+
+    def method(self, inputs: dict) -> dict:
+        return multiflex_lf(**inputs)
+
+    def insert_dataframes(self, steps: StepManager, inputs) -> dict:
+        inputs["peptide_df"] = steps.get_step_output(
+            Step, "peptide_df", inputs["peptide_df"]
+        )
+
+        inputs["metadata_df"] = steps.metadata_df
+        inputs["colormap"] = int(inputs["colormap"])
+        return inputs
 
 
 class SelectPeptidesForProtein(DataAnalysisStep):

diff --git a/requirements.txt b/requirements.txt
@@ -19,7 +19,7 @@ pytest-django==4.5.2
 pytest-order==1.1.0
 restring==0.1.20
 scikit-learn==1.2.2
-scipy==1.10.1
+scipy==1.11.0
 statsmodels==0.13.5
 umap-learn==0.5.3
 Werkzeug==2.2.3
@@ -36,3 +36,6 @@ beautifulsoup4==4.12.2
 sphinx==7.2.6
 sphinx-autoapi==3.0.0
 openpyxl==3.1.2
+pydeseq2==0.4.9
+seaborn==0.13.0
+matplotlib==3.8.0
diff --git a/ui/runs/form_mapping.py b/ui/runs/form_mapping.py
@@ -63,6 +63,7 @@
     data_analysis.ProteinGraphVariationGraph: data_analysis_forms.ProteinGraphVariationGraphForm,
     data_analysis.SelectPeptidesForProtein: data_analysis_forms.SelectPeptidesForProteinForm,
     data_analysis.FLEXIQuantLF: data_analysis_forms.FLEXIQuantLFForm,
+    data_analysis.MultiFLEXLF: data_analysis_forms.MultiFLEXLFForm,
     data_analysis.PTMsPerSample: data_analysis_forms.PTMsPerSampleForm,
     data_analysis.PTMsProteinAndPerSample: data_analysis_forms.PTMsPerProteinAndSampleForm,
     data_preprocessing.ImputationByMinPerSample: data_preprocessing_forms.ImputationByMinPerSampleForms,

diff --git a/ui/runs/forms/data_analysis.py b/ui/runs/forms/data_analysis.py
@@ -1007,7 +1007,7 @@ class FLEXIQuantLFForm(MethodForm):
     def fill_form(self, run: Run) -> None:
         self.fields["peptide_df"].choices = fill_helper.get_choices(run, "peptide_df")
         self.fields["grouping_column"].choices = fill_helper.to_choices(
-            run.steps.metadata_df.drop("Sample", axis=1).columns[1:]
+            run.steps.metadata_df.drop("Sample", axis=1).columns
         )
 
         chosen_grouping_column = self.data.get(
@@ -1028,6 +1028,54 @@ def fill_form(self, run: Run) -> None:
         )
 
 
+class MultiFLEXLFForm(MethodForm):
+    peptide_df = CustomChoiceField(label="Peptide dataframe", choices=[])
+    reference_group = CustomChoiceField(label="Reference group", choices=[])
+    num_init = CustomNumberField(
+        label="Number of RANSAC initiations",
+        initial=30,
+        min_value=1,
+        max_value=60,
+        step_size=1,
+    )
+    mod_cutoff = CustomFloatField(
+        label="Modification cutoff", initial=0.5, min_value=0, max_value=1
+    )
+    imputation_cosine_similarity = CustomFloatField(
+        label="Minimal cosine similarity value for missing value imputation of RM scores for clustering",
+        initial=0.98,
+    )
+    colormap = CustomChoiceField(
+        label="Color map used for heatmap",
+        choices=[
+            (1, "red-white-blue"),
+            (2, "pink-white-green"),
+            (3, "purple-white-green"),
+            (4, "brown-white-bluegreen"),
+            (5, "orange-white-purple"),
+            (6, "red-white-grey"),
+            (7, "red-yellow-green"),
+            (8, "red-yellow-blue"),
+        ],
+    )
+    remove_outliers = CustomBooleanField(
+        label="Remove peptides with outlier intensities from RM score calculation",
+        required=False,
+        initial=True,
+    )
+    deseq2_normalization = CustomBooleanField(
+        label="Apply DESeq2 normalization to RM scores before clustering",
+        required=False,
+        initial=True,
+    )
+
+    def fill_form(self, run: Run) -> None:
+        self.fields["peptide_df"].choices = fill_helper.get_choices(run, "peptide_df")
+        self.fields["reference_group"].choices = fill_helper.to_choices(
+            run.steps.metadata_df["Group"].unique()
+        )
+
+
 class SelectPeptidesForProteinForm(MethodForm):
     is_dynamic = True