Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
d1ba888
#177, updated package version
MohammedFCIS Sep 16, 2025
5b7c945
#177,added data.table dependency and exported new parser
MohammedFCIS Sep 28, 2025
6a7ca57
#177, added new onside parser
MohammedFCIS Sep 28, 2025
705d820
#177, refactored "add_database_info" to be more generic
MohammedFCIS Sep 28, 2025
2d15fee
#177, added dvobject metainfo for onside
MohammedFCIS Sep 28, 2025
65e72eb
#177, added dvobject metainfo for onside
MohammedFCIS Sep 28, 2025
358dd4c
Merge branch 'OnSIDES-database' of https://github.com/ropensci/dbpars…
MohammedFCIS Sep 28, 2025
52e925f
#177, add merge drugbank and onsides databases function
MohammedFCIS Oct 5, 2025
7c47d46
#177, updated show dvobject meta data function
MohammedFCIS Oct 5, 2025
21926e2
- Fixed unit tests
MohammedFCIS Oct 5, 2025
5250420
#177, added missed imports
MohammedFCIS Oct 6, 2025
2b87ca5
#177, fixed global variables issue
MohammedFCIS Oct 6, 2025
a82577f
#177, fixed function param documentation
MohammedFCIS Oct 6, 2025
beb2039
#177, imported null operator from purrr and fixed CMD warning
MohammedFCIS Oct 8, 2025
929e677
#177, Added util functions and docs
MohammedFCIS Nov 25, 2025
a9a2ecd
#177, add drugbank subset function unit tests and added few fixes
agenius-mohammed-ali Nov 30, 2025
fa3561d
- Added object metadata
agenius-mohammed-ali Nov 30, 2025
8b1136f
#177, Added subset dvobject metadata unit tests
agenius-mohammed-ali Nov 30, 2025
cc13331
#177, added onside subset function
agenius-mohammed-ali Nov 30, 2025
056ab75
#177, add onside subset function unit test
agenius-mohammed-ali Nov 30, 2025
03a3c39
#177, fixed issues and added unit tests
agenius-mohammed-ali Nov 30, 2025
5f3d210
#177, Added drugbank onside tutorial
agenius-mohammed-ali Dec 6, 2025
0970d71
#177, updated add_database_info documentation
agenius-mohammed-ali Dec 8, 2025
ab29013
#177, fixed CMD notes
agenius-mohammed-ali Dec 8, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: dbparser
Title: Drugs Databases Parser
Version: 2.0.3.9000
Version: 2.1.0.9001
Authors@R:
c(
person("Mohammed", "Ali", email = "[email protected]", role = c("aut", "cre")),
Expand All @@ -12,13 +12,15 @@ Description: This tool is for parsing public drug databases such as 'DrugBank' X
License: MIT + file LICENSE
Encoding: UTF-8
Imports:
data.table,
dplyr,
progress,
purrr,
tibble,
tools,
utils,
XML
RoxygenNote: 7.2.3
RoxygenNote: 7.3.3
Suggests:
canvasXpress,
knitr,
Expand Down
15 changes: 15 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
# Generated by roxygen2: do not edit by hand

export(add_database_info)
export(cett_nodes_options)
export(drug_node_options)
export(merge_drugbank_onsides)
export(parseDrugBank)
export(parseOnSIDES)
export(references_node_options)
export(show_dvobject_metadata)
export(subset_drugbank_dvobject)
export(subset_onsides_dvobject)
import(dplyr)
importFrom(XML,xmlApply)
importFrom(XML,xmlChildren)
Expand All @@ -16,7 +21,16 @@ importFrom(XML,xmlSize)
importFrom(XML,xmlToDataFrame)
importFrom(XML,xmlToList)
importFrom(XML,xmlValue)
importFrom(data.table,fread)
importFrom(dplyr,.data)
importFrom(dplyr,filter)
importFrom(dplyr,left_join)
importFrom(dplyr,mutate)
importFrom(dplyr,pull)
importFrom(dplyr,rename)
importFrom(dplyr,select)
importFrom(progress,progress_bar)
importFrom(purrr,"%||%")
importFrom(purrr,'%>%')
importFrom(purrr,is_empty)
importFrom(purrr,is_null)
Expand All @@ -26,5 +40,6 @@ importFrom(tibble,as_tibble)
importFrom(tibble,as_tibble_row)
importFrom(tibble,tibble)
importFrom(tibble,tibble_row)
importFrom(utils,object.size)
importFrom(utils,stack)
importFrom(utils,unzip)
8 changes: 7 additions & 1 deletion R/drugbank_parser.R
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,13 @@ parseDrugBank <- function(db_path,
message("Completed loading DrugBank DB into memory")
message("...........................................")
pkg_env$root <- XML::xmlRoot(parsed_db)
dvobject <- add_drugbank_info(dvobject = dvobject)
dvobject <- add_database_info(dvobject = dvobject,
db_version = XML::xmlGetAttr(
node = pkg_env$root,
name = "version"),
db_exported_date = XML::xmlGetAttr(
node = pkg_env$root,
name = "exported-on"))
message("parsing drugs elements")

dvobject[["drugs"]] <- parse_drug_nodes(drug_options)
Expand Down
211 changes: 191 additions & 20 deletions R/dvobject_metadata.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,35 +12,206 @@ init_dvobject <- function() {
}


#' add_drugbank_info
#' Add passed DrugBank db metadata to passed dvobject
#' add_database_info
#' Assign passed databases db metadata to passed dvobject
#'
#' @param dvobject dvobject to assign metadata to it
#' @param db_type database type (defualt="DrugBank")
#' @param db_version database version as string
#' @param db_exported_date database official export date
#'
#' @keywords internal
#' @return dvobject
add_drugbank_info <- function(dvobject) {
db_info <- attr(dvobject, "original_db_info")

db_info[["db_type"]] <- "DrugBank"
db_info[["db_version"]] <- XML::xmlGetAttr(node = pkg_env$root,
name = "version")
db_info[["db_exported_date"]] <- XML::xmlGetAttr(node = pkg_env$root,
name = "exported-on")
#' @export
#' @importFrom dplyr .data filter select rename mutate left_join
add_database_info <- function(dvobject,
db_type = "DrugBank",
db_version = NULL,
db_exported_date = NULL) {
db_info <- attr(dvobject, db_type)

db_info[["db_type"]] <- db_type
db_info[["db_version"]] <- db_version
db_info[["db_exported_date"]] <- db_exported_date
attr(dvobject, "original_db_info") <- db_info
class(dvobject) <- "dvobject"
dvobject
}


#' init_dvobject
#' Returns data.frame with two columns (key, value) of dvobject attributes
#' Display dvobject Metadata
#'
#' @param dvobject - dvobject list to show related metadata
#' Displays information about passed dbobject object including basic info,
#' database metadata, and all data.frames contained within nested lists.
#'
#' @param obj A dvobject
#' @param return_df Logical. If TRUE, returns metadata data.frame without printing.
#' Default is FALSE.
#'
#' @return Invisibly returns a data.frame containing dvobject metadata
#'
#' @examples
#' \dontrun{
#' display_merged_db_attrs(drugbank)
#' metadata <- display_merged_db_attrs(drugbank, return_df = TRUE)
#' }
#'
#' @return data.frame
#' @family utility
#' @export
show_dvobject_metadata <- function(dvobject) {
dvobject_attributes <- attr(dvobject, "original_db_info")
dvobject_attributes[["class"]] <- class(dvobject)
data.frame(Atrribute = names(dvobject_attributes), Value = stack(dvobject_attributes)[[1]])
#' @importFrom utils object.size
show_dvobject_metadata <- function(obj, return_df = FALSE) {
# Initialize return variable
metadata <- NULL

# Validate input
attrs <- attributes(obj)

if (is.null(attrs) || is.null(attrs$original_db_info)) {
stop("Object does not have expected database attributes (missing 'original_db_info')")
}

# Find all dataframes in object
df_info <- find_dataframes_recursive(obj)

# Count only top-level lists (not data.frames)
top_level_lists <- count_top_level_lists(obj)

# Find second database if it exists
second_db_info <- find_second_database(attrs)

# Build metadata
metadata <- build_metadata(
attrs = attrs,
has_second_db = second_db_info$has_second_db,
second_db_name = second_db_info$second_db_name
)

# Display output if not returning data.frame
if (!return_df) {
cat("=== BASIC INFO ===\n")
basic_info <- data.frame(
Class = paste(attrs$class, collapse = ", "),
Total_DataFrames = nrow(df_info),
Top_Level_Lists = top_level_lists,
Object_Size = format(object.size(obj), units = "auto"),
stringsAsFactors = FALSE
)
print(basic_info)

cat("\n=== DATABASE METADATA ===\n")
print(metadata, row.names = FALSE)

cat("\n=== DATA.FRAMES ===\n")
print(df_info, row.names = FALSE)
}

# Return metadata
invisible(metadata)
}


# Helper Functions for Display Attributes
# These are internal functions not exported to users

#' Find All Data Frames Recursively
#' @param x Object to search
#' @param prefix Current path prefix
#' @return Data.frame with paths, dimensions, and sizes
#' @keywords internal
find_dataframes_recursive <- function(x, prefix = "") {
result <- NULL

if (is.data.frame(x)) {
result <- data.frame(
Path = prefix,
Rows = nrow(x),
Cols = ncol(x),
Size = format(object.size(x), units = "auto"),
stringsAsFactors = FALSE
)
} else if (is.list(x) && (length(x) > 0)) {
results <- lapply(names(x), function(name) {
new_prefix <- if (prefix == "") name else paste0(prefix, "$", name)
find_dataframes_recursive(x[[name]], new_prefix)
})
result <- do.call(rbind, results[!sapply(results, is.null)])
}

result
}

#' Count Top Level Lists
#' @param obj Object to analyze
#' @return Integer count of top-level lists (excluding data.frames)
#' @keywords internal
count_top_level_lists <- function(obj) {
sum(sapply(obj, function(x) {
(is.list(x) && !is.data.frame(x))
}))
}

#' Find Second Database in Attributes
#' @param attrs Attributes list
#' @return List with has_second_db (logical) and second_db_name (character)
#' @keywords internal
find_second_database <- function(attrs) {
result <- list(
has_second_db = FALSE,
second_db_name = NULL
)

standard_attrs <- c("names", "class", "original_db_info", "row.names")
potential_second_db <- setdiff(names(attrs), standard_attrs)

if (length(potential_second_db) > 0) {
for (db_name in potential_second_db) {
db_attr <- attrs[[db_name]]
required_fields <- c("db_type", "db_version", "db_exported_date")

if ((is.list(db_attr)) && (all(required_fields %in% names(db_attr)))) {
result$has_second_db <- TRUE
result$second_db_name <- db_name
}
}
}

result
}

#' Build Metadata Data Frame
#' @param attrs Attributes list
#' @param has_second_db Logical indicating if second database exists
#' @param second_db_name Name of second database attribute
#' @return Data.frame with database metadata
#' @keywords internal
#' @importFrom purrr %||%
build_metadata <- function(attrs, has_second_db, second_db_name) {
metadata <- NULL

if (has_second_db) {
metadata <- rbind(
data.frame(
Database = "First Database",
Type = attrs$DrugBankDB$db_type %||% "Unknown",
Version = attrs$DrugBankDB$db_version %||% "Unknown",
Export_Date = attrs$DrugBankDB$db_exported_date %||% "Unknown",
stringsAsFactors = FALSE
),
data.frame(
Database = "Second Database",
Type = attrs[[second_db_name]]$db_type %||% "Unknown",
Version = attrs[[second_db_name]]$db_version %||% "Unknown",
Export_Date = attrs[[second_db_name]]$db_exported_date %||% "Unknown",
stringsAsFactors = FALSE
)
)
} else {
metadata <- data.frame(
Database = "Original",
Type = attrs$original_db_info$db_type %||% "Unknown",
Version = attrs$original_db_info$db_version %||% "Unknown",
Export_Date = attrs$original_db_info$db_exported_date %||% "Unknown",
stringsAsFactors = FALSE
)
}

metadata
}
Loading