Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 15 additions & 8 deletions protzilla/importing/metadata_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,27 +122,34 @@ def metadata_import_method_diann(
return dict(
messages=[dict(level=logging.ERROR, msg=msg)],
)
meta_df.rename(columns={"sample name": "Sample"}, inplace=True)

if file_path.startswith(
f"{PROJECT_PATH}/tests/protzilla/importing/conversion_tmp_"
):
os.remove(file_path)

if groupby_sample:
# we want to take the median of all MS runs (column "Sample" in the intensity df) for each Sample
# (column "sample name" in the metadata df)
# we want to take the median of all MS runs (column "Sample" in the protein df) for each Sample
# (column "Sample" in the metadata df)
protein_df.rename(columns={"Sample": "MS run"}, inplace=True)
protein_df = pd.merge(
protein_df,
meta_df[["MS run", "sample name"]],
left_on="Sample",
meta_df[["MS run", "Sample"]],
left_on="MS run",
right_on="MS run",
how="left",
)
protein_df = protein_df.groupby(
["Protein ID", "sample name"], as_index=False
).median()
protein_df.rename(columns={"sample name": "Sample"}, inplace=True)
protein_df = (
protein_df.groupby(["Protein ID", "Sample"])["Intensity"]
.median()
.reset_index()
)
return dict(protein_df=protein_df, metadata_df=meta_df)
else:
meta_df.rename(
columns={"MS run": "Sample", "Sample": "Sample Group"}, inplace=True
)

return dict(protein_df=protein_df, metadata_df=meta_df)

Expand Down
3 changes: 3 additions & 0 deletions protzilla/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import traceback

import protzilla.constants.paths as paths
from protzilla.constants.protzilla_logging import logger
from protzilla.steps import Messages, Output, Plots, Step
from protzilla.utilities import format_trace

Expand All @@ -31,6 +32,8 @@ def __enter__(self):
def __exit__(self, exc_type, exc_value, tb):
if exc_type:
formatted_trace = format_trace(traceback.format_exception(exc_value))
# print the traceback to the console
logger.exception(exc_value)
if (
hasattr(self.run, "current_step")
and self.run.current_step is not None
Expand Down
5 changes: 4 additions & 1 deletion protzilla/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,8 @@ def compute_workflow(self):
if not step.finished:
break

self.run.step_next()
if self.run.steps.current_step_index < len(self.run.steps.all_steps) - 1:
self.run.step_next()
self.run._run_write()

def _insert_commandline_inputs(self, step):
Expand Down Expand Up @@ -123,6 +124,8 @@ def _perform_current_step(self, params=None):

def _save_plots_html(self, step):
for i, plot in enumerate(step.plots):
if isinstance(plot, bytes):
continue
plot_path = f"{self.plots_path}/{self.run.steps.current_step_index}-{step.section}-{step.operation}-{step.display_name}-{i}.html"
plot.write_html(plot_path)

Expand Down
15 changes: 4 additions & 11 deletions protzilla/steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import plotly
from PIL import Image

from protzilla.constants.protzilla_logging import logger
from protzilla.utilities import format_trace


Expand Down Expand Up @@ -79,6 +80,7 @@ def calculate(self, steps: StepManager, inputs: dict) -> None:

self.validate_outputs()
except NotImplementedError as e:
logger.exception(e)
self.messages.append(
dict(
level=logging.ERROR,
Expand All @@ -87,6 +89,7 @@ def calculate(self, steps: StepManager, inputs: dict) -> None:
)
)
except ValueError as e:
logger.exception(e)
self.messages.append(
dict(
level=logging.ERROR,
Expand All @@ -95,24 +98,14 @@ def calculate(self, steps: StepManager, inputs: dict) -> None:
)
)
except TypeError as e:
logger.exception(e)
self.messages.append(
dict(
level=logging.ERROR,
msg=f"Please check the implementation of this steps method class (especially the input_keys): {e}.",
trace=format_trace(traceback.format_exception(e)),
)
)
except Exception as e:
self.messages.append(
dict(
level=logging.ERROR,
msg=(
f"An error occurred while calculating this step: {e.__class__.__name__} {e} "
f"Please check your parameters or report a potential programming issue."
),
trace=format_trace(traceback.format_exception(e)),
)
)

def method(self, **kwargs) -> dict:
raise NotImplementedError("This method must be implemented in a subclass.")
Expand Down
146 changes: 146 additions & 0 deletions tests/proteinGroups_small.txt

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion tests/protzilla/importing/test_metadata_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def test_metadata_import_diann(run_empty):
test_metadata, run_empty.current_outputs["metadata_df"]
)
pd.testing.assert_frame_equal(
test_protein_df, run_empty.steps.get_step_output(DiannImport, "protein_df")
test_protein_df, run_empty.current_outputs["protein_df"]
)


Expand Down
140 changes: 103 additions & 37 deletions tests/protzilla/test_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,13 @@
sys.path.append(f"{PROJECT_PATH}")

from protzilla.runner import Runner, _serialize_graphs
from runner_cli import args_parser
from protzilla.steps import Output, Plots
from runner_cli import args_parser


@pytest.fixture
def ms_data_path():
return "tests/proteinGroups_small_cut.txt"
return "tests/proteinGroups_small.txt"


@pytest.fixture
Expand All @@ -43,7 +43,8 @@ def mock_current_parameters(*args, **kwargs):

# side effect to mark the step as finished
runner.run.current_step.output = Output(
{key: "mock_output_value" for key in runner.run.current_step.output_keys})
{key: "mock_output_value" for key in runner.run.current_step.output_keys}
)
if len(runner.run.current_step.output_keys) == 0:
runner.run.current_step.plots = Plots(["mock_plot"])

Expand All @@ -69,7 +70,7 @@ def test_runner_imports(
monkeypatch, tests_folder_name, ms_data_path, metadata_path, peptide_path
):
importing_args = [
"standard", # expects max-quant import, metadata import
".standard", # expects max-quant import, metadata import
ms_data_path,
f"--run_name={tests_folder_name}/test_runner_{random_string()}",
f"--meta_data_path={metadata_path}",
Expand All @@ -88,34 +89,87 @@ def test_runner_imports(
runner.compute_workflow()

expected_methods = [
'MaxQuantImport',
'MetadataImport',
'FilterProteinsBySamplesMissing',
'FilterSamplesByProteinIntensitiesSum',
'ImputationByKNN',
'OutlierDetectionByLocalOutlierFactor',
'NormalisationByMedian',
'TransformationLog',
'PlotProtQuant',
'DifferentialExpressionTTest',
'PlotVolcano',
'EnrichmentAnalysisGOAnalysisWithString',
'PlotGOEnrichmentBarPlot'
"MaxQuantImport",
"MetadataImport",
"FilterProteinsBySamplesMissing",
"FilterSamplesByProteinIntensitiesSum",
"ImputationByKNN",
"OutlierDetectionByLocalOutlierFactor",
"NormalisationByMedian",
"TransformationLog",
"PlotProtQuant",
"DifferentialExpressionTTest",
"PlotVolcano",
"EnrichmentAnalysisGOAnalysisWithString",
"PlotGOEnrichmentBarPlot",
]
expected_method_parameters = [
call({'intensity_name': 'iBAQ', 'map_to_uniprot': False, 'file_path': 'tests/proteinGroups_small_cut.txt'}),
call({'feature_orientation': 'Columns (samples in rows, features in columns)', 'file_path': 'tests/metadata_cut_columns.csv'}),
call({'percentage': 0.5}),
call({'deviation_threshold': 2.0}),
call({'number_of_neighbours': 5}),
call({'number_of_neighbors': 20}),
call({'percentile': 0.5}),
call({'log_base': 'log2'}),
call({'similarity_measure': 'euclidean distance'}),
call({'alpha': 0.05}),
call({'fc_threshold': 1}),
call({'differential_expression_threshold': 1, 'direction': 'both', 'gene_sets_restring': [], 'organism': 9606}),
call({'colors': [], 'cutoff': 0.05, 'gene_sets': ['Process', 'Component', 'Function', 'KEGG'], 'top_terms': 10, 'value': 'p-value'})
call(
{
"intensity_name": "iBAQ",
"map_to_uniprot": False,
"file_path": "tests/proteinGroups_small.txt",
}
),
call(
{
"feature_orientation": "Columns (samples in rows, features in columns)",
"file_path": "tests/metadata_cut_columns.csv",
}
),
call({"percentage": 0.5}),
call({"deviation_threshold": 2.0}),
call({"number_of_neighbours": 5}),
call({"number_of_neighbors": 20}),
call({"percentile": 0.5}),
call({"log_base": "log2"}),
call(
{
"input_df": "TransformationLog_1",
"protein_group": "P10636;P10636-9",
"similarity": 1,
"similarity_measure": "euclidean distance",
}
),
call(
{
"alpha": 0.05,
"group1": "AD",
"group2": "CTR",
"grouping": "Group",
"multiple_testing_correction_method": "Benjamini-Hochberg",
"protein_df": "TransformationLog_1",
"ttest_type": "Welch's t-Test",
}
),
call(
{
"fc_threshold": 1,
"input_dict": "DifferentialExpressionTTest_1",
"proteins_of_interest": [],
}
),
call(
{
"background_path": None,
"differential_expression_threshold": 1,
"direction": "both",
"gene_sets_restring": ["Component", "Function", "KEGG", "Process"],
"organism": 9606,
"protein_df": "DifferentialExpressionTTest_1",
}
),
call(
{
"colors": [],
"cutoff": 0.05,
"gene_sets": ["Process", "Component", "Function", "KEGG"],
"input_df_step_instance": "EnrichmentAnalysisGOAnalysisWithString_1",
"title": None,
"top_terms": 10,
"value": "p-value",
}
),
]

assert mock_method.call_count == 13
Expand Down Expand Up @@ -168,10 +222,20 @@ def test_runner_calculates(monkeypatch, tests_folder_name, ms_data_path, metadat
"FilterProteinsBySamplesMissing",
]
assert mock_method.call_args_list == [
call({'intensity_name': 'iBAQ', 'map_to_uniprot': False, 'file_path': 'tests/proteinGroups_small_cut.txt'}),
call({'feature_orientation': 'Columns (samples in rows, features in columns)',
'file_path': 'tests/metadata_cut_columns.csv'}),
call({'percentage': 0.5})
call(
{
"intensity_name": "iBAQ",
"map_to_uniprot": False,
"file_path": "tests/proteinGroups_small.txt",
}
),
call(
{
"feature_orientation": "Columns (samples in rows, features in columns)",
"file_path": "tests/metadata_cut_columns.csv",
}
),
call({"percentage": 0.5}),
]
mock_plot.assert_not_called()

Expand Down Expand Up @@ -251,11 +315,13 @@ def test_serialize_workflow_graphs():
assert _serialize_graphs(step["graphs"]) == serial_filter_graphs


def test_integration_runner(metadata_path, ms_data_path, tests_folder_name, monkeypatch):
def test_integration_runner(
metadata_path, ms_data_path, tests_folder_name, monkeypatch
):
name = tests_folder_name + "/test_runner_integration_" + random_string()
runner = Runner(
**{
"workflow": "standard",
"workflow": ".standard",
"ms_data_path": f"{PROJECT_PATH}/{ms_data_path}",
"meta_data_path": f"{PROJECT_PATH}/{metadata_path}",
"peptides_path": None,
Expand All @@ -276,7 +342,7 @@ def test_integration_runner_no_plots(metadata_path, ms_data_path, tests_folder_n
name = tests_folder_name + "/test_runner_integration" + random_string()
runner = Runner(
**{
"workflow": "standard",
"workflow": ".standard",
"ms_data_path": f"{PROJECT_PATH}/{ms_data_path}",
"meta_data_path": f"{PROJECT_PATH}/{metadata_path}",
"peptides_path": None,
Expand Down
2 changes: 1 addition & 1 deletion tests/test_data/DIANN_data/correct_metadata_table.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
MS run,sample name,replicate
MS run,Sample,replicate
LM07061,24h wt DMSO,1
LM07062,24h ko DMSO,1
LM07063,24h wt Prodi,1
Expand Down
Loading