Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
2ef4fb7
new version of small test data
KarinSchork Apr 21, 2026
06e36f9
fix problem when there are no contaminants or decoys
KarinSchork Apr 21, 2026
d77dda3
work on examples (WIP)
KarinSchork Apr 21, 2026
a083ae4
work on examples
KarinSchork Apr 23, 2026
fe933d7
work on examples
KarinSchork Apr 23, 2026
e4be8a9
add isomoprhism example
KarinSchork Apr 29, 2026
2eb6943
update test data for examples
KarinSchork Apr 29, 2026
02fdeca
Merge branch 'devel' into devel_Karin
KarinSchork Apr 30, 2026
1c8220d
merge and examples
KarinSchork Apr 30, 2026
76cd36b
update tests
KarinSchork Jun 2, 2026
6c86997
add linux snapshot for peptideProcessing
KarinSchork Jun 2, 2026
adc7ebe
try to fix tests
KarinSchork Jun 3, 2026
a096cc1
Merge branch 'devel_Karin' of https://github.com/mpc-bioinformatics/b…
KarinSchork Jun 3, 2026
fd404b8
Comment out r-version in test-coverage.yaml
KarinSchork Jun 3, 2026
570c9ac
change one example test case
KarinSchork Jun 8, 2026
59ee5e6
clean up github actions
KarinSchork Jun 8, 2026
155dd15
fix small bug in optimization
KarinSchork Jun 9, 2026
76fb282
fix small bug in peptideInput
KarinSchork Jun 9, 2026
15b9dc8
fix example for export
KarinSchork Jun 9, 2026
3fc1dcc
fix example for export
KarinSchork Jun 9, 2026
9fa5f87
fix example for protein elimination
KarinSchork Jun 9, 2026
1675a6d
fix typo
KarinSchork Jun 9, 2026
9a0ecb7
fix typo
KarinSchork Jun 9, 2026
6c833ec
complete description of test files
KarinSchork Jun 9, 2026
fbe0396
update documentation
KarinSchork Jun 9, 2026
405f17f
delete old test data
KarinSchork Jun 9, 2026
1db7d1f
fix export example
KarinSchork Jun 9, 2026
fc79cbc
fix docu
KarinSchork Jun 9, 2026
be15585
make test for normalization OS-agnostic
KarinSchork Jun 9, 2026
fb4b2d1
update test
KarinSchork Jun 9, 2026
c57500d
fix typo
KarinSchork Jun 9, 2026
cb76378
fix test for subgraph characteristics table
KarinSchork Jun 10, 2026
17dc52e
Clarify source of peptide intensities in documentation
KarinSchork Jun 17, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ Description: Functionality to create and characterize bipartite graphs that
License: BSD_3_clause + file LICENSE
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.3
biocViews: Proteomics, GraphAndNetwork, MassSpectrometry, MultipleComparison, Visualization
Imports:
checkmate,
Expand All @@ -36,8 +35,10 @@ Suggests:
BiocStyle,
knitr,
pak,
RefManageR
RefManageR,
sessioninfo
Config/testthat/edition: 3
VignetteBuilder: knitr
Depends:
igraph (>= 2.0.0)
Config/roxygen2/version: 8.0.0
30 changes: 18 additions & 12 deletions R/01_peptideInput.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#


#' Helper function that extracts the itensitie columns and columns of interest
#' Helper function that extracts the intensity columns and columns of interest
#' from a given dataframe.
#'
#' @param D \strong{data.frame} \cr
Expand All @@ -16,7 +16,7 @@
#' If \code{TRUE}, "Intensity." or
#' "LFQ.intensity." are removed
#' @return returns intensity dataframe
#'
#'
#' @importFrom stringr str_replace
#'
.extractIntensities <- function(D, col_pattern, rename_columns){
Expand Down Expand Up @@ -54,16 +54,16 @@
#' except peptide sequence and intensities
#' @param verbose \strong{logical} \cr
#' If \code{TRUE}, additional information on
#' the data extraction is
#' the data extraction is
#' printed
#' @return A SummarizedExperiment with intensities, sequences, and optional data
#' for the rowData dataframe.
#' @export
#'
#' @examples
#' file <- system.file("extdata", "peptides.txt", package = "bppg")
#' file <- system.file("extdata", "peptides_filtered.txt", package = "bppg")
#' D <- readMqPeptideTable(path = file, LFQ = TRUE, remove_contaminants = FALSE)
#'
#'
#' @importFrom checkmate assertFileExists assertFlag assertVector
#' @importFrom utils read.table
#' @importFrom SummarizedExperiment SummarizedExperiment
Expand All @@ -87,15 +87,21 @@ readMqPeptideTable <- function(path, group = NULL, LFQ = FALSE,
rownames(D) <- D$Sequence

## remove decoy entries:
ind_decoy <- D$Reverse == "+"
D <- D[!ind_decoy, ]
ind_decoy <- NULL
if (!all(is.na(D$Reverse))) {
ind_decoy <- (D$Reverse == "+")
D <- D[!ind_decoy, ]
}

if (verbose) print(paste0("Removed ", sum(ind_decoy), " decoy sequences."))
Comment thread
KarinSchork marked this conversation as resolved.

Comment thread
KarinSchork marked this conversation as resolved.
ind_cont <- D$Potential.contaminant == "+"
if (remove_contaminants) {
D <- D[!ind_cont, ]
if (verbose) print(paste0("Removed ", sum(ind_cont),
" contaminant sequences."))
if (!all(is.na(D$Potential.contaminant))) {
ind_cont <- D$Potential.contaminant == "+"
D <- D[!ind_cont, ]
if (verbose) print(paste0("Removed ", sum(ind_cont),
" contaminant sequences."))
}
}
Comment thread
KarinSchork marked this conversation as resolved.

if (LFQ) {
Expand Down Expand Up @@ -127,6 +133,6 @@ readMqPeptideTable <- function(path, group = NULL, LFQ = FALSE,
rowDF <- data.frame(Sequence = D$Sequence, further_columns)
}
return(SummarizedExperiment::SummarizedExperiment(
assays = list(intensities=intensities),
assays = list(intensities=intensities),
colData = colDF, rowData = rowDF))
}
41 changes: 21 additions & 20 deletions R/02_peptidePreprocessing.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,11 @@
#' Aggregate replicates of the same experimental group.
#'
#' @param D \strong{SummarizedExperiment} \cr
#' SummarizedExperiment Dataset containing peptide
#' SummarizedExperiment Dataset containing peptide
#' intensities, e.g. the result of
#' [normalizePeptideIntensities].

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ne, das muss normalize sein, weil es das assay $intensities_norm erwartet, das gibt es nur nach normalizePeptideIntensities (deswegen ist no norm erlaubt, was dann nur die daten dahin schiebt)

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ich bin nicht sicher ob ich die Stelle überhaupt angefasst hatte, ich ändere es aber gerne :D

#' @param group \strong{character factor} \cr
#' The groups per sample for aggregation, if not already
#' The groups per sample for aggregation, if not already
#' in SummarizedExperiment::colData(D)$group.
#' @param missing.limit \strong{numeric} \cr
#' The proportion of missing values that is allowed
Expand All @@ -63,19 +63,19 @@
#' @export
#'
#' @examples
#' file <- system.file("extdata", "peptides.txt", package = "bppg")
#' D <- readMqPeptideTable(path = file, LFQ = TRUE, remove_contaminants = FALSE)
#' D_norm <- bppg::normalizePeptideIntensities(D)
#' file <- system.file("extdata", "peptides_filtered.txt", package = "bppg")
#' group <- factor(rep(1:9, each = 3))
#' aggregateReplicates(D_norm, group = group)
#'
#' @importFrom checkmate assertCharacter assertClass assertDataFrame
#' assertFactor assertNumber
#' D <- readMqPeptideTable(path = file, group = group, LFQ = TRUE, remove_contaminants = FALSE)
#' D_norm <- bppg::normalizePeptideIntensities(D)
#' aggregateReplicates(D_norm)
#'
#' @importFrom checkmate assertCharacter assertClass assertDataFrame
#' assertFactor assertNumber
#' @importFrom SummarizedExperiment assays colData rowData SummarizedExperiment

aggregateReplicates <- function(D,
group = NULL,
missing.limit = 0,
group = NULL,
missing.limit = 0,
method = "mean",
seq_col = "Sequence") {
checkmate::assertClass(D, "SummarizedExperiment")
Expand Down Expand Up @@ -130,12 +130,13 @@ aggregateReplicates <- function(D,
#' @export
#'
#' @examples
#' file <- system.file("extdata", "peptides.txt", package = "bppg")
#' D <- readMqPeptideTable(path = file, LFQ = TRUE, remove_contaminants = FALSE)
#' file <- system.file("extdata", "peptides_filtered.txt", package = "bppg")
#' group <- factor(rep(1:9, each = 3))
#' dAgg <- aggregateReplicates(D, group = group)
#' D <- readMqPeptideTable(path = file, group = group, LFQ = TRUE, remove_contaminants = FALSE)
#' D_norm <- bppg::normalizePeptideIntensities(D)
#' dAgg <- aggregateReplicates(D_norm)
#' calculatePeptideRatios(dAgg)
#'
#'
#' @importFrom checkmate assertClass assertDataFrame assertVector
#' @importFrom SummarizedExperiment assays colData rowData SummarizedExperiment

Expand Down Expand Up @@ -175,10 +176,10 @@ calculatePeptideRatios <- function(D, group_levels = NULL) {


#' Normalization of peptide intensities
#'
#'
#' @param D \strong{SummarizedExperiment} \cr
#' SummarizedExperiment dataset containing peptide
#' intensities, e.g.
#' SummarizedExperiment dataset containing peptide
#' intensities, e.g.
#' [readMqPeptideTable].
#' @param method \strong{character} \cr
#' The method of normalization. Options are "nonorm"
Expand All @@ -193,10 +194,10 @@ calculatePeptideRatios <- function(D, group_levels = NULL) {
#' @export
#'
#' @examples
#' file <- system.file("extdata", "peptides.txt", package = "bppg")
#' file <- system.file("extdata", "peptides_filtered.txt", package = "bppg")
#' D <- readMqPeptideTable(path = file, LFQ = TRUE, remove_contaminants = FALSE)
#' D_norm <- normalizePeptideIntensities(D, method = "loess")
#'
#'
#' @importFrom SummarizedExperiment assays colData rowData SummarizedExperiment
#' @importFrom limma normalizeBetweenArrays
#' @importFrom vsn vsn2
Expand Down
17 changes: 9 additions & 8 deletions R/03_FASTAPreprocessing.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
#' @param stop \strong{integer vector} \cr
#' Index of where peptides stop
#' @param miss \strong{integer} \cr
#' which order of missed cleavages,
#' which order of missed cleavages,
#' e.g. 0 = no missed cleavage
#'
#' @return A dataframe with information (inkl. peptide sequence and start value)
Expand Down Expand Up @@ -54,9 +54,10 @@
#'
#' @examples
#' library(seqinr)
#' file <- system.file("extdata", "uniprot_test.fasta", package = "bppg")
#' file <- system.file("extdata", "uniprot_proteome_Scerevisiae_filtered.fasta", package = "bppg")
#' fasta <- seqinr::read.fasta(file = file, seqtype = "AA", as.string = TRUE)
#'
#' # digest first protein sequence in the fasta file:
#' digested_proteins <- bppg:::.digest2(fasta[[1]])

.digest2 <- function(sequence,
Expand Down Expand Up @@ -161,9 +162,9 @@
#'
#' @param fasta \strong{list of character sequence} \cr
#' A fasta file, already read into R by
#' [seqinr::read.fasta()]. If several protein origins
#' are used they should be combined into one flatten
#' list and a list with the corresponding origins
#' [seqinr::read.fasta()]. If several protein origins
#' are used they should be combined into one flatten
#' list and a list with the corresponding origins
#' should be provided for \strong{protOrigin}.
#' @param missed_cleavages \strong{integer} \cr
#' The maximal number of missed cleavages.
Expand All @@ -178,7 +179,7 @@
#' [fasta], proteins are used as index.
#' @param verbose \strong{logical} \cr
#' If \code{TRUE}, additional information on
#' each iteration of the digestion is
#' each iteration of the digestion is
#' printed.
#' @param ... Additional arguments for [.digest2()].
#' @inheritDotParams .digest2
Expand All @@ -192,13 +193,13 @@
#'
#' @examples
#' library(seqinr)
#' file <- system.file("extdata", "uniprot_test.fasta", package = "bppg")
#' file <- system.file("extdata", "uniprot_proteome_Scerevisiae_filtered.fasta", package = "bppg")
#' fasta <- seqinr::read.fasta(file = file, seqtype = "AA", as.string = TRUE)
#' res <- digestFASTA(fasta)
#'
#' @importFrom checkmate assertFlag assertInt assertList
#' @importFrom pbapply pblapply pboptions
#'
#'
digestFASTA <- function(fasta,
missed_cleavages = 2,
min_aa = 6,
Expand Down
41 changes: 21 additions & 20 deletions R/04_graphGeneration.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@
#'
#' @examples
#' library(seqinr)
#' file <- system.file("extdata", "uniprot_test.fasta", package = "bppg")
#' file <- system.file("extdata", "uniprot_proteome_Scerevisiae_filtered.fasta", package = "bppg")
#' fasta <- seqinr::read.fasta(file = file, seqtype = "AA", as.string = TRUE)
#' edgelist <- digestFASTA(fasta)
#' res <- bppg:::.getContractMapping(edgelist)
#'
#'
#' @importFrom stats aggregate

.getContractMapping <- function(edgelist,
Expand Down Expand Up @@ -67,15 +67,15 @@
#'
#' @examples
#' library(seqinr)
#' file <- system.file("extdata", "uniprot_test.fasta", package = "bppg")
#' file <- system.file("extdata", "uniprot_proteome_Scerevisiae_filtered.fasta", package = "bppg")
#' fasta <- seqinr::read.fasta(file = file, seqtype = "AA", as.string = TRUE)
#' edgelist <- digestFASTA(fasta)
#' vMapping <- bppg:::.getContractMapping(edgelist)
#' G <- igraph::graph_from_edgelist(as.matrix(edgelist), directed = FALSE)
#' igraph::V(G)[igraph::V(G)$name %in% edgelist[, 1]]$type <- TRUE
#' igraph::V(G)[igraph::V(G)$name %in% edgelist[, 2]]$type <- FALSE
#' res <- bppg:::.contractGraph(G, vMapping)
#'
#'
#' @importFrom igraph contract set_vertex_attr simplify V
#' @importFrom stats na.omit

Expand All @@ -96,7 +96,7 @@
match(igraph::V(G)$name[!igraph::V(G)$type],
vMapping$peptides$peptide)])

gColl <- igraph::contract(G,
gColl <- igraph::contract(G,
factor(stats::na.omit(igraph::V(G)$collSignature)),
vertex.attr.comb = c)

Expand All @@ -107,7 +107,7 @@
igraph::V(gColl)$type <- vapply(igraph::V(gColl)$type, "[", 1,
FUN.VALUE = logical(1))
igraph::V(gColl)$name <- vapply(igraph::V(gColl)$name, paste, collapse=";",
FUN.VALUE = character(1))
FUN.VALUE = character(1))
# this is not ordered - > same ratio order


Expand All @@ -124,12 +124,12 @@
if (!is.null(igraph::V(gColl)$protOrigin)) {
if (collProtNodes) {
igraph::V(gColl)$protOrigin[igraph::V(gColl)$type] <- vapply(
igraph::V(gColl)$protOrigin[igraph::V(gColl)$type],
igraph::V(gColl)$protOrigin[igraph::V(gColl)$type],
function(x) { paste(unique(x), collapse = ";")
}, FUN.VALUE = character(1))
}
# igraph::V(gColl)$protOrigin[!igraph::V(gColl)$type] <- NA
igraph::V(gColl)$protOrigin <- vapply(igraph::V(gColl)$protOrigin, "[",
igraph::V(gColl)$protOrigin <- vapply(igraph::V(gColl)$protOrigin, "[",
1, FUN.VALUE = character(1))
}

Expand All @@ -151,11 +151,12 @@
#'
#' @examples
#' library(seqinr)
#' file <- system.file("extdata", "uniprot_test.fasta", package = "bppg")
#' file <- system.file("extdata", "uniprot_proteome_Scerevisiae_filtered.fasta", package = "bppg")
#' fasta <- seqinr::read.fasta(file = file, seqtype = "AA", as.string = TRUE)
#' edgelist <- digestFASTA(fasta)
#' res <- bppg::generateGraphsFromEdgelist(edgelist)
#'
#' res <- bppg::generateGraphsFromEdgelist(edgelist, collProtNodes = TRUE,
#' collPeptNodes = TRUE)
#'
#' @importFrom igraph graph_from_edgelist set_vertex_attr V
#'
generateGraphsFromEdgelist <- function(edgelist,
Expand Down Expand Up @@ -186,7 +187,7 @@ generateGraphsFromEdgelist <- function(edgelist,
match(igraph::V(G)$name[!igraph::V(G)$type],
edgelist$peptide)])
}

if (!is.null(edgelist$protOrigin)) {
protOriginDF <- edgelist[, c("protein", "protOrigin")]
protOriginDF <- protOriginDF[!duplicated(protOriginDF), ]
Expand Down Expand Up @@ -237,26 +238,26 @@ generateGraphsFromEdgelist <- function(edgelist,
#'
#' @examples
#' library(seqinr)
#' file <- system.file("extdata", "uniprot_test.fasta", package = "bppg")
#' file <- system.file("extdata", "uniprot_proteome_Scerevisiae_filtered.fasta", package = "bppg")
#' fasta <- seqinr::read.fasta(file = file, seqtype = "AA", as.string = TRUE)
#' edgelist <- digestFASTA(fasta)
#'
#' file <- system.file("extdata", "peptides.txt", package = "bppg")
#' D <- readMqPeptideTable(path = file, LFQ = TRUE, remove_contaminants = FALSE)
#' file <- system.file("extdata", "peptides_filtered.txt", package = "bppg")
#' group <- factor(rep(1:9, each = 3))
#' dAgg <- aggregateReplicates(D, group = group)
#' D <- readMqPeptideTable(path = file, group = group, LFQ = TRUE, remove_contaminants = FALSE)
#' D_norm <- bppg::normalizePeptideIntensities(D)
#' dAgg <- aggregateReplicates(D_norm)
#' exp_peptide_ratios <- calculatePeptideRatios(dAgg)
#'
#' res <- generateQuantGraphs(exp_peptide_ratios, edgelist)

generateQuantGraphs <- function(exp_peptide_ratios,
fasta_edgelist,
seq_column = "Sequence",
seq_column = "Sequence",
outpath = NULL,
collProtNodes = TRUE,
collPeptNodes = FALSE,
suffix = "") {
## How to assert? could be int, maybe if
checkmate::assertClass(exp_peptide_ratios, "SummarizedExperiment")
checkmate::assertDataFrame(SummarizedExperiment::assays(
exp_peptide_ratios)$logRatios, all.missing=FALSE)
Expand Down Expand Up @@ -287,9 +288,9 @@ generateQuantGraphs <- function(exp_peptide_ratios,

compEdgelist <- edgelist_filtered[edgelist_filtered$peptide
%in% rownames(compRatio), ]
compEdgelist$pep_logRatio <- compRatio[match(compEdgelist$peptide,
compEdgelist$pep_logRatio <- compRatio[match(compEdgelist$peptide,
rownames(compRatio)), 1]
generateGraphsFromEdgelist(compEdgelist, collProtNodes,
generateGraphsFromEdgelist(compEdgelist, collProtNodes,
collPeptNodes)
})

Expand Down
Loading
Loading