mpc-bioinformatics · KarinSchork · Apr 21, 2026 · Apr 21, 2026 · Apr 21, 2026 · Apr 23, 2026
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -14,7 +14,6 @@ Description: Functionality to create and characterize bipartite graphs that
 License: BSD_3_clause + file LICENSE
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.3.3
 biocViews: Proteomics, GraphAndNetwork, MassSpectrometry, MultipleComparison, Visualization
 Imports: 
     checkmate,
@@ -36,8 +35,10 @@ Suggests:
     BiocStyle,
     knitr,
     pak, 
-    RefManageR
+    RefManageR, 
+    sessioninfo
 Config/testthat/edition: 3
 VignetteBuilder: knitr
 Depends: 
     igraph (>= 2.0.0)
+Config/roxygen2/version: 8.0.0
diff --git a/R/01_peptideInput.R b/R/01_peptideInput.R
@@ -4,7 +4,7 @@
 #
 
 
-#' Helper function that extracts the itensitie columns and columns of interest 
+#' Helper function that extracts the intensity columns and columns of interest
 #' from a given dataframe.
 #'
 #' @param D                         \strong{data.frame} \cr
@@ -16,7 +16,7 @@
 #'                                  If \code{TRUE}, "Intensity." or
 #'                                  "LFQ.intensity." are removed
 #' @return returns intensity dataframe
-#' 
+#'
 #' @importFrom stringr str_replace
 #'
 .extractIntensities <- function(D, col_pattern, rename_columns){
@@ -54,16 +54,16 @@
 #'                                  except peptide sequence and intensities
 #' @param verbose                   \strong{logical} \cr
 #'                                  If \code{TRUE}, additional information on
-#'                                  the data extraction is 
+#'                                  the data extraction is
 #'                                  printed
 #' @return A SummarizedExperiment with intensities, sequences, and optional data
 #'         for the rowData dataframe.
 #' @export
 #'
 #' @examples
-#' file <- system.file("extdata", "peptides.txt", package = "bppg")
+#' file <- system.file("extdata", "peptides_filtered.txt", package = "bppg")
 #' D <- readMqPeptideTable(path = file, LFQ = TRUE, remove_contaminants = FALSE)
-#' 
+#'
 #' @importFrom checkmate assertFileExists assertFlag assertVector
 #' @importFrom utils read.table
 #' @importFrom SummarizedExperiment SummarizedExperiment
@@ -87,15 +87,21 @@ readMqPeptideTable <- function(path, group = NULL, LFQ = FALSE,
     rownames(D) <- D$Sequence
 
     ## remove decoy entries:
-    ind_decoy <- D$Reverse == "+"
-    D <- D[!ind_decoy, ]
+    ind_decoy <- NULL
+    if (!all(is.na(D$Reverse))) {
+        ind_decoy <- (D$Reverse == "+")
+        D <- D[!ind_decoy, ]
+    }
+
     if (verbose) print(paste0("Removed ", sum(ind_decoy), " decoy sequences."))
 
-    ind_cont <- D$Potential.contaminant == "+"
     if (remove_contaminants) {
-        D <- D[!ind_cont, ]
-        if (verbose) print(paste0("Removed ", sum(ind_cont),
-                " contaminant sequences."))
+        if (!all(is.na(D$Potential.contaminant))) {
+            ind_cont <- D$Potential.contaminant == "+"
+            D <- D[!ind_cont, ]
+            if (verbose) print(paste0("Removed ", sum(ind_cont),
+                                      " contaminant sequences."))
+        }
     }
 
     if (LFQ) {
@@ -127,6 +133,6 @@ readMqPeptideTable <- function(path, group = NULL, LFQ = FALSE,
         rowDF <- data.frame(Sequence = D$Sequence, further_columns)
     }
     return(SummarizedExperiment::SummarizedExperiment(
-        assays = list(intensities=intensities), 
+        assays = list(intensities=intensities),
         colData = colDF, rowData = rowDF))
 }
diff --git a/R/02_peptidePreprocessing.R b/R/02_peptidePreprocessing.R
@@ -41,11 +41,11 @@
 #' Aggregate replicates of the same experimental group.
 #'
 #' @param D              \strong{SummarizedExperiment} \cr
-#'                       SummarizedExperiment Dataset containing peptide 
+#'                       SummarizedExperiment Dataset containing peptide
 #'                       intensities, e.g. the result of 
 #'                       [normalizePeptideIntensities].
 #' @param group          \strong{character factor} \cr
-#'                       The groups per sample for aggregation, if not already 
+#'                       The groups per sample for aggregation, if not already
 #'                       in SummarizedExperiment::colData(D)$group.
 #' @param missing.limit  \strong{numeric} \cr
 #'                       The proportion of missing values that is allowed
@@ -63,19 +63,19 @@
 #' @export
 #'
 #' @examples
-#' file <- system.file("extdata", "peptides.txt", package = "bppg")
-#' D <- readMqPeptideTable(path = file, LFQ = TRUE, remove_contaminants = FALSE)
-#' D_norm <- bppg::normalizePeptideIntensities(D)
+#' file <- system.file("extdata", "peptides_filtered.txt", package = "bppg")
 #' group <- factor(rep(1:9, each = 3))
-#' aggregateReplicates(D_norm, group = group)
-#' 
-#' @importFrom checkmate assertCharacter assertClass assertDataFrame 
-#' assertFactor assertNumber 
+#' D <- readMqPeptideTable(path = file, group = group, LFQ = TRUE, remove_contaminants = FALSE)
+#' D_norm <- bppg::normalizePeptideIntensities(D)
+#' aggregateReplicates(D_norm)
+#'
+#' @importFrom checkmate assertCharacter assertClass assertDataFrame
+#' assertFactor assertNumber
 #' @importFrom SummarizedExperiment assays colData rowData SummarizedExperiment
 
 aggregateReplicates <- function(D,
-    group = NULL, 
-    missing.limit = 0, 
+    group = NULL,
+    missing.limit = 0,
     method = "mean",
     seq_col = "Sequence") {
     checkmate::assertClass(D, "SummarizedExperiment")
@@ -130,12 +130,13 @@ aggregateReplicates <- function(D,
 #' @export
 #'
 #' @examples
-#' file <- system.file("extdata", "peptides.txt", package = "bppg")
-#' D <- readMqPeptideTable(path = file, LFQ = TRUE, remove_contaminants = FALSE)
+#' file <- system.file("extdata", "peptides_filtered.txt", package = "bppg")
 #' group <- factor(rep(1:9, each = 3))
-#' dAgg <- aggregateReplicates(D, group = group)
+#' D <- readMqPeptideTable(path = file, group = group, LFQ = TRUE, remove_contaminants = FALSE)
+#' D_norm <- bppg::normalizePeptideIntensities(D)
+#' dAgg <- aggregateReplicates(D_norm)
 #' calculatePeptideRatios(dAgg)
-#' 
+#'
 #' @importFrom checkmate assertClass assertDataFrame assertVector
 #' @importFrom SummarizedExperiment assays colData rowData SummarizedExperiment
 
@@ -175,10 +176,10 @@ calculatePeptideRatios <- function(D, group_levels = NULL) {
 
 
 #' Normalization of peptide intensities
-#' 
+#'
 #' @param D              \strong{SummarizedExperiment} \cr
-#'                       SummarizedExperiment dataset containing peptide 
-#'                       intensities, e.g. 
+#'                       SummarizedExperiment dataset containing peptide
+#'                       intensities, e.g.
 #'                       [readMqPeptideTable].
 #' @param method \strong{character} \cr
 #'          The method of normalization. Options are "nonorm"
@@ -193,10 +194,10 @@ calculatePeptideRatios <- function(D, group_levels = NULL) {
 #' @export
 #'
 #' @examples
-#' file <- system.file("extdata", "peptides.txt", package = "bppg")
+#' file <- system.file("extdata", "peptides_filtered.txt", package = "bppg")
 #' D <- readMqPeptideTable(path = file, LFQ = TRUE, remove_contaminants = FALSE)
 #' D_norm <- normalizePeptideIntensities(D, method = "loess")
-#' 
+#'
 #' @importFrom SummarizedExperiment assays colData rowData SummarizedExperiment
 #' @importFrom limma normalizeBetweenArrays
 #' @importFrom vsn vsn2

diff --git a/R/03_FASTAPreprocessing.R b/R/03_FASTAPreprocessing.R
@@ -17,7 +17,7 @@
 #' @param stop               \strong{integer vector} \cr
 #'                           Index of where peptides stop
 #' @param miss               \strong{integer} \cr
-#'                           which order of missed cleavages, 
+#'                           which order of missed cleavages,
 #'                           e.g. 0 = no missed cleavage
 #'
 #' @return A dataframe with information (inkl. peptide sequence and start value)
@@ -54,9 +54,10 @@
 #'
 #' @examples
 #' library(seqinr)
-#' file <- system.file("extdata", "uniprot_test.fasta", package = "bppg")
+#' file <- system.file("extdata", "uniprot_proteome_Scerevisiae_filtered.fasta", package = "bppg")
 #' fasta <- seqinr::read.fasta(file = file, seqtype = "AA", as.string = TRUE)
 #'
+#' # digest first protein sequence in the fasta file:
 #' digested_proteins <- bppg:::.digest2(fasta[[1]])
 
 .digest2 <- function(sequence,
@@ -161,9 +162,9 @@
 #'
 #' @param fasta              \strong{list of character sequence} \cr
 #'                           A fasta file, already read into R by
-#'                           [seqinr::read.fasta()]. If several protein origins 
-#'                           are used they should be combined into one flatten  
-#'                           list and a list with the corresponding origins  
+#'                           [seqinr::read.fasta()]. If several protein origins
+#'                           are used they should be combined into one flatten
+#'                           list and a list with the corresponding origins
 #'                           should be provided for \strong{protOrigin}.
 #' @param missed_cleavages   \strong{integer} \cr
 #'                           The maximal number of missed cleavages.
@@ -178,7 +179,7 @@
 #'                           [fasta], proteins are used as index.
 #' @param verbose            \strong{logical} \cr
 #'                           If \code{TRUE}, additional information on
-#'                           each iteration of the digestion is 
+#'                           each iteration of the digestion is
 #'                           printed.
 #' @param ...                Additional arguments for [.digest2()].
 #' @inheritDotParams .digest2
@@ -192,13 +193,13 @@
 #'
 #' @examples
 #' library(seqinr)
-#' file <- system.file("extdata", "uniprot_test.fasta", package = "bppg")
+#' file <- system.file("extdata", "uniprot_proteome_Scerevisiae_filtered.fasta", package = "bppg")
 #' fasta <- seqinr::read.fasta(file = file, seqtype = "AA", as.string = TRUE)
 #' res <- digestFASTA(fasta)
 #'
 #' @importFrom checkmate assertFlag assertInt assertList
 #' @importFrom pbapply pblapply pboptions
-#' 
+#'
 digestFASTA <- function(fasta,
     missed_cleavages = 2,
     min_aa = 6,

diff --git a/R/04_graphGeneration.R b/R/04_graphGeneration.R
@@ -20,11 +20,11 @@
 #'
 #' @examples
 #' library(seqinr)
-#' file <- system.file("extdata", "uniprot_test.fasta", package = "bppg")
+#' file <- system.file("extdata", "uniprot_proteome_Scerevisiae_filtered.fasta", package = "bppg")
 #' fasta <- seqinr::read.fasta(file = file, seqtype = "AA", as.string = TRUE)
 #' edgelist <- digestFASTA(fasta)
 #' res <- bppg:::.getContractMapping(edgelist)
-#' 
+#'
 #' @importFrom stats aggregate
 
 .getContractMapping <- function(edgelist,
@@ -67,15 +67,15 @@
 #'
 #' @examples
 #' library(seqinr)
-#' file <- system.file("extdata", "uniprot_test.fasta", package = "bppg")
+#' file <- system.file("extdata", "uniprot_proteome_Scerevisiae_filtered.fasta", package = "bppg")
 #' fasta <- seqinr::read.fasta(file = file, seqtype = "AA", as.string = TRUE)
 #' edgelist <- digestFASTA(fasta)
 #' vMapping <- bppg:::.getContractMapping(edgelist)
 #' G <- igraph::graph_from_edgelist(as.matrix(edgelist), directed = FALSE)
 #' igraph::V(G)[igraph::V(G)$name %in% edgelist[, 1]]$type <- TRUE
 #' igraph::V(G)[igraph::V(G)$name %in% edgelist[, 2]]$type <- FALSE
 #' res <- bppg:::.contractGraph(G, vMapping)
-#' 
+#'
 #' @importFrom igraph contract set_vertex_attr simplify V
 #' @importFrom stats na.omit
 
@@ -96,7 +96,7 @@
             match(igraph::V(G)$name[!igraph::V(G)$type],
                 vMapping$peptides$peptide)])
 
-    gColl <- igraph::contract(G, 
+    gColl <- igraph::contract(G,
         factor(stats::na.omit(igraph::V(G)$collSignature)),
         vertex.attr.comb = c)
 
@@ -107,7 +107,7 @@
     igraph::V(gColl)$type <- vapply(igraph::V(gColl)$type, "[", 1,
         FUN.VALUE = logical(1))
     igraph::V(gColl)$name <- vapply(igraph::V(gColl)$name, paste, collapse=";",
-        FUN.VALUE = character(1)) 
+        FUN.VALUE = character(1))
     # this is not ordered - > same ratio order
 
 
@@ -124,12 +124,12 @@
     if (!is.null(igraph::V(gColl)$protOrigin)) {
         if (collProtNodes) {
             igraph::V(gColl)$protOrigin[igraph::V(gColl)$type] <- vapply(
-                    igraph::V(gColl)$protOrigin[igraph::V(gColl)$type], 
+                    igraph::V(gColl)$protOrigin[igraph::V(gColl)$type],
                     function(x) { paste(unique(x), collapse = ";")
                     }, FUN.VALUE = character(1))
         }
         # igraph::V(gColl)$protOrigin[!igraph::V(gColl)$type] <- NA
-        igraph::V(gColl)$protOrigin <- vapply(igraph::V(gColl)$protOrigin, "[", 
+        igraph::V(gColl)$protOrigin <- vapply(igraph::V(gColl)$protOrigin, "[",
         1, FUN.VALUE = character(1))
     }
 
@@ -151,11 +151,12 @@
 #'
 #' @examples
 #' library(seqinr)
-#' file <- system.file("extdata", "uniprot_test.fasta", package = "bppg")
+#' file <- system.file("extdata", "uniprot_proteome_Scerevisiae_filtered.fasta", package = "bppg")
 #' fasta <- seqinr::read.fasta(file = file, seqtype = "AA", as.string = TRUE)
 #' edgelist <- digestFASTA(fasta)
-#' res <- bppg::generateGraphsFromEdgelist(edgelist)
-#' 
+#' res <- bppg::generateGraphsFromEdgelist(edgelist, collProtNodes = TRUE,
+#'     collPeptNodes = TRUE)
+#'
 #' @importFrom igraph graph_from_edgelist set_vertex_attr V
 #'
 generateGraphsFromEdgelist <- function(edgelist,
@@ -186,7 +187,7 @@ generateGraphsFromEdgelist <- function(edgelist,
                 match(igraph::V(G)$name[!igraph::V(G)$type],
                     edgelist$peptide)])
     }
-    
+
     if (!is.null(edgelist$protOrigin)) {
         protOriginDF <- edgelist[, c("protein", "protOrigin")]
         protOriginDF <- protOriginDF[!duplicated(protOriginDF), ]
@@ -237,26 +238,26 @@ generateGraphsFromEdgelist <- function(edgelist,
 #'
 #' @examples
 #' library(seqinr)
-#' file <- system.file("extdata", "uniprot_test.fasta", package = "bppg")
+#' file <- system.file("extdata", "uniprot_proteome_Scerevisiae_filtered.fasta", package = "bppg")
 #' fasta <- seqinr::read.fasta(file = file, seqtype = "AA", as.string = TRUE)
 #' edgelist <- digestFASTA(fasta)
 #'
-#' file <- system.file("extdata", "peptides.txt", package = "bppg")
-#' D <- readMqPeptideTable(path = file, LFQ = TRUE, remove_contaminants = FALSE)
+#' file <- system.file("extdata", "peptides_filtered.txt", package = "bppg")
 #' group <- factor(rep(1:9, each = 3))
-#' dAgg <- aggregateReplicates(D, group = group)
+#' D <- readMqPeptideTable(path = file, group = group, LFQ = TRUE, remove_contaminants = FALSE)
+#' D_norm <- bppg::normalizePeptideIntensities(D)
+#' dAgg <- aggregateReplicates(D_norm)
 #' exp_peptide_ratios <- calculatePeptideRatios(dAgg)
 #'
 #' res <- generateQuantGraphs(exp_peptide_ratios, edgelist)
 
 generateQuantGraphs <- function(exp_peptide_ratios,
     fasta_edgelist,
-    seq_column = "Sequence", 
+    seq_column = "Sequence",
     outpath = NULL,
     collProtNodes = TRUE,
     collPeptNodes = FALSE,
     suffix = "") {
-    ## How to assert? could be int, maybe if 
     checkmate::assertClass(exp_peptide_ratios, "SummarizedExperiment")
     checkmate::assertDataFrame(SummarizedExperiment::assays(
         exp_peptide_ratios)$logRatios, all.missing=FALSE)
@@ -287,9 +288,9 @@ generateQuantGraphs <- function(exp_peptide_ratios,
 
             compEdgelist <- edgelist_filtered[edgelist_filtered$peptide
                 %in% rownames(compRatio), ]
-            compEdgelist$pep_logRatio <- compRatio[match(compEdgelist$peptide, 
+            compEdgelist$pep_logRatio <- compRatio[match(compEdgelist$peptide,
                 rownames(compRatio)), 1]
-            generateGraphsFromEdgelist(compEdgelist, collProtNodes, 
+            generateGraphsFromEdgelist(compEdgelist, collProtNodes,
                 collPeptNodes)
         })