diff --git a/.Rbuildignore b/.Rbuildignore index 561a653e..59863e10 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -9,4 +9,5 @@ ^pkgdown$ ^\.circleci$ ^\.circleci/config\.yml$ -^\.github$ \ No newline at end of file +^\.github$ +^cran-comments\.md$ diff --git a/.github/workflows/dsBase_test_suite.yaml b/.github/workflows/dsBase_test_suite.yaml new file mode 100755 index 00000000..bea80e31 --- /dev/null +++ b/.github/workflows/dsBase_test_suite.yaml @@ -0,0 +1,210 @@ +################################################################################ +# DataSHIELD GHA test suite - dsBase +# Adapted from `azure-pipelines.yml` by Roberto Villegas-Diaz +# +# Inside the root directory $(Pipeline.Workspace) will be a file tree like: +# /dsBase <- Checked out version of datashield/dsBase +# /dsBase/logs <- Where results of tests and logs are collated +# /testStatus <- Checked out version of datashield/testStatus +# +# As of Jul 2025 this takes ~ 9 mins to run. +################################################################################ +name: dsBase tests' suite + +on: + push: + schedule: + - cron: '0 0 * * 0' # Weekly + +jobs: + dsBase_test_suite: + runs-on: ubuntu-latest + timeout-minutes: 120 + permissions: + contents: write + + # These should all be constant, except TEST_FILTER. This can be used to test + # subsets of test files in the testthat directory. Options are like: + # '*' <- Run all tests. + # 'asNumericDS*' <- Run all asNumericDS tests, i.e. all the arg, etc. tests. + # '*_smk_*' <- Run all the smoke tests for all functions. + env: + TEST_FILTER: '*' + _r_check_system_clock_: 0 + WORKFLOW_ID: ${{ github.run_id }}-${{ github.run_attempt }} + PROJECT_NAME: dsBase + BRANCH_NAME: ${{ github.ref_name }} + REPO_OWNER: ${{ github.repository_owner }} + R_KEEP_PKG_SOURCE: yes + GITHUB_TOKEN: ${{ github.token || 'placeholder-token' }} + + steps: + - name: Checkout dsBase + uses: actions/checkout@v4 + with: + path: dsBase + + - name: Checkout testStatus + if: ${{ github.actor != 'nektos/act' }} # for local deployment only + uses: actions/checkout@v4 + with: + repository: ${{ env.REPO_OWNER }}/testStatus + ref: master + path: testStatus + persist-credentials: false + token: ${{ env.GITHUB_TOKEN }} + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: release + http-user-agent: release + use-public-rspm: true + + - name: Install dsBase + run: | + Rscript -e 'install.packages(c("RANN", "stringr", "lme4", "dplyr", "reshape2", "polycor", "gamlss", "gamlss.dist", "mice", "childsds", "usethis", "devtools"), dependencies = TRUE)' + R CMD INSTALL ./dsBase + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + dependencies: 'c("Imports")' + extra-packages: | + any::rcmdcheck + cran::devtools + cran::git2r + cran::RCurl + cran::readr + cran::magrittr + cran::xml2 + cran::purrr + cran::dplyr + cran::stringr + cran::tidyr + cran::quarto + cran::knitr + cran::kableExtra + cran::rmarkdown + cran::downlit + needs: check + + - name: Check man files up-to-date + run: | + orig_sum=$(find man -type f | sort -u | xargs cat | md5sum) + R -e "devtools::document()" + new_sum=$(find man -type f | sort -u | xargs cat | md5sum) + if [ "$orig_sum" != "$new_sum" ]; then + echo "Your committed manual files (man/*.Rd) are out of sync with the R files. Run devtools::document() locally then commit." + exit 1 + else + echo "Documentation up-to-date." + fi + working-directory: dsBase + continue-on-error: true + + - name: Run devtools::check + run: | + R -q -e "library('devtools'); devtools::check(args = c('--no-tests', '--no-examples'))" | tee ../check.Rout + grep -q "^0 errors" ../check.Rout && grep -q " 0 warnings" ../check.Rout && grep -q " 0 notes" ../check.Rout + working-directory: dsBase + continue-on-error: true + + - name: Run tests with coverage & JUnit report + run: | + mkdir -p logs + R -q -e "devtools::reload();" + R -q -e ' + write.csv( + covr::coverage_to_list( + covr::package_coverage( + type = c("none"), + code = c('"'"' + output_file <- file("test_console_output.txt"); + sink(output_file); + sink(output_file, type = "message"); + junit_rep <- testthat::JunitReporter$new(file = file.path(getwd(), "test_results.xml")); + progress_rep <- testthat::ProgressReporter$new(max_failures = 999999); + multi_rep <- testthat::MultiReporter$new(reporters = list(progress_rep, junit_rep)); + testthat::test_package("${{ env.PROJECT_NAME }}", filter = "${{ env.TEST_FILTER }}", reporter = multi_rep, stop_on_failure = FALSE)'"'"' + ) + ) + ), + "coveragelist.csv" + )' + + mv coveragelist.csv logs/ + mv test_* logs/ + grep -q " FAIL 0 " logs/test_console_output.txt + working-directory: dsBase + + - name: Check for JUnit errors + run: | + issue_count=$(sed 's/failures="0" errors="0"//' test_results.xml | grep -c errors= || true) + echo "Number of testsuites with issues: $issue_count" + sed 's/failures="0" errors="0"//' test_results.xml | grep errors= > issues.log || true + cat issues.log || true + exit $issue_count + working-directory: dsBase/logs + + - name: Write versions to file + run: | + echo "branch:${{ env.BRANCH_NAME }}" > ${{ env.WORKFLOW_ID }}.txt + echo "os:$(lsb_release -ds)" >> ${{ env.WORKFLOW_ID }}.txt + echo "R:$(R --version | head -n1)" >> ${{ env.WORKFLOW_ID }}.txt + Rscript --vanilla -e 'sessionInfo()' >> session_info_${{ env.WORKFLOW_ID }}.txt + working-directory: dsBase/logs + + - name: Parse results from testthat and covr + run: | + Rscript --verbose --vanilla ../testStatus/source/parse_test_report.R logs/ logs/ https://github.com/datashield/${{ env.PROJECT_NAME }}/blob/${{ env.BRANCH_NAME }} '[^-:.]+' '(?<=::)[^:]+(?=::)' + working-directory: dsBase + env: + PROJECT_NAME: ${{ env.PROJECT_NAME }} + BRANCH_NAME: ${{ env.BRANCH_NAME }} + + - name: Render report + run: | + cd testStatus + + mkdir -p new/logs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/ + mkdir -p new/docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/latest/ + + # Copy logs to new logs directory location + cp -rv ../${{ env.PROJECT_NAME }}/logs/* new/logs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/ + cp -rv ../${{ env.PROJECT_NAME }}/logs/${{ env.WORKFLOW_ID }}.txt new/logs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/ + + R -e 'input_dir <- file.path("../new/logs", Sys.getenv("PROJECT_NAME"), Sys.getenv("BRANCH_NAME"), Sys.getenv("WORKFLOW_ID")); quarto::quarto_render("source/test_report.qmd", execute_params = list(input_dir = input_dir))' + mv source/test_report.html new/docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/latest/index.html + + env: + PROJECT_NAME: ${{ env.PROJECT_NAME }} + BRANCH_NAME: ${{ env.BRANCH_NAME }} + WORKFLOW_ID: ${{ env.WORKFLOW_ID }} + + - name: Upload test logs + uses: actions/upload-artifact@v4 + with: + name: dsbase-logs + path: testStatus/new + + - name: Dump environment info + run: | + echo -e "\n#############################" + echo -e "ls /: ######################" + ls -al . + echo -e "\n#############################" + echo -e "lscpu: ######################" + lscpu + echo -e "\n#############################" + echo -e "memory: #####################" + free -m + echo -e "\n#############################" + echo -e "env: ########################" + env + echo -e "\n#############################" + echo -e "R sessionInfo(): ############" + R -e 'sessionInfo()' + sudo apt install tree -y + tree . + diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml new file mode 100644 index 00000000..bfc9f4db --- /dev/null +++ b/.github/workflows/pkgdown.yaml @@ -0,0 +1,49 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + release: + types: [published] + workflow_dispatch: + +name: pkgdown.yaml + +permissions: read-all + +jobs: + pkgdown: + runs-on: ubuntu-latest + # Only restrict concurrency for non-PR jobs + concurrency: + group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + permissions: + contents: write + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::pkgdown, local::. + needs: website + + - name: Build site + run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) + shell: Rscript {0} + + - name: Deploy to GitHub pages 🚀 + if: github.event_name != 'pull_request' + uses: JamesIves/github-pages-deploy-action@v4.5.0 + with: + clean: false + branch: gh-pages + folder: docs diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml new file mode 100644 index 00000000..0ab748d6 --- /dev/null +++ b/.github/workflows/test-coverage.yaml @@ -0,0 +1,62 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + +name: test-coverage.yaml + +permissions: read-all + +jobs: + test-coverage: + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::covr, any::xml2 + needs: coverage + + - name: Test coverage + run: | + cov <- covr::package_coverage( + quiet = FALSE, + clean = FALSE, + install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package") + ) + print(cov) + covr::to_cobertura(cov) + shell: Rscript {0} + + - uses: codecov/codecov-action@v5 + with: + # Fail if error if not on PR, or if on PR and token is given + fail_ci_if_error: ${{ github.event_name != 'pull_request' || secrets.CODECOV_TOKEN }} + files: ./cobertura.xml + plugins: noop + disable_search: true + token: ${{ secrets.CODECOV_TOKEN }} + + - name: Show testthat output + if: always() + run: | + ## -------------------------------------------------------------------- + find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true + shell: bash + + - name: Upload test results + if: failure() + uses: actions/upload-artifact@v4 + with: + name: coverage-test-failures + path: ${{ runner.temp }}/package diff --git a/DESCRIPTION b/DESCRIPTION index 31c7e951..f37bbbca 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,23 +1,27 @@ Package: dsBase -Title: 'DataSHIELD' Server Site Base Functions +Title: 'DataSHIELD' Server Side Base Functions Description: Base 'DataSHIELD' functions for the server side. 'DataSHIELD' is a software package which allows you to do non-disclosive federated analysis on sensitive data. 'DataSHIELD' analytic functions have been designed to only share non disclosive summary statistics, with built in automated output checking based on statistical disclosure control. With data sites setting the threshold values for - the automated output checks. -Version: 6.4.0-9000 + the automated output checks. For more details, see 'citation("dsBase")'. +Version: 6.4.0.9000 Authors@R: c(person(given = "Paul", family = "Burton", - role = c("aut")), + role = c("aut"), + comment = c(ORCID = "0000-0001-5799-9634")), person(given = "Rebecca", family = "Wilson", - role = c("aut")), + role = c("aut"), + comment = c(ORCID = "0000-0003-2294-593X")), person(given = "Olly", family = "Butters", - role = c("aut")), + role = c("aut"), + comment = c(ORCID = "0000-0003-0354-8461")), person(given = "Patricia", family = "Ryser-Welch", - role = c("aut")), + role = c("aut"), + comment = c(ORCID = "0000-0002-0070-0264")), person(given = "Alex", family = "Westerberg", role = c("aut")), @@ -37,6 +41,17 @@ Authors@R: c(person(given = "Paul", role = c("aut"), email = "yannick.marcon@obiba.org", comment = c(ORCID = "0000-0003-0138-2023")), + person(given = "Tom", + family = "Bishop", + role = c("aut")), + person(given = "Amadou", + family = "Gaye", + role = c("aut"), + comment = c(ORCID = "0000-0002-1180-2792")), + person(given = "Xavier", + family = "Escribà-Montagut", + role = c("aut"), + comment = c(ORCID = "0000-0003-2888-8948")), person(given = "Stuart", family = "Wheater", role = c("aut", "cre"), @@ -59,8 +74,11 @@ Imports: childsds, purrr, tibble, - tidyselect + tidyselect, + tidytable Suggests: + spelling, testthat -RoxygenNote: 7.3.2 +RoxygenNote: 7.3.3 Encoding: UTF-8 +Language: en-GB diff --git a/NAMESPACE b/NAMESPACE index 655cae99..9f7a5fbd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -88,6 +88,7 @@ export(matrixDimnamesDS) export(matrixInvertDS) export(matrixMultDS) export(matrixTransposeDS) +export(mdPatternDS) export(meanDS) export(meanSdGpDS) export(mergeDS) @@ -125,6 +126,8 @@ export(setSeedDS) export(skewnessDS1) export(skewnessDS2) export(sqrtDS) +export(subsetByClassDS) +export(subsetDS) export(table1DDS) export(table2DDS) export(tableDS) diff --git a/R/BooleDS.R b/R/BooleDS.R index 8d5f5a49..3a905953 100644 --- a/R/BooleDS.R +++ b/R/BooleDS.R @@ -28,7 +28,7 @@ BooleDS <- function(V1.name=NULL, V2.name=NULL, Boolean.operator.n=NULL, na.assign.text, numeric.output=TRUE){ # Check Permissive Privacy Control Level. - dsBase::checkPermissivePrivacyControlLevel(c('permissive', 'banana')) + dsBase::checkPermissivePrivacyControlLevel(c('permissive', 'banana', 'carrot')) V1 <- eval(parse(text=V1.name), envir = parent.frame()) V2 <- eval(parse(text=V2.name), envir = parent.frame()) diff --git a/R/asDataMatrixDS.R b/R/asDataMatrixDS.R index b8f53f19..2980965d 100644 --- a/R/asDataMatrixDS.R +++ b/R/asDataMatrixDS.R @@ -1,7 +1,12 @@ #' @title asDataFrameDS a serverside assign function called by ds.asDataFrame #' @description Coerces an R object into a matrix maintaining original #' class for all columns in data.frames. -#' @details This assign function is based on the native R function \code{data.frame} +#' @details This assign function is based on the native R function \code{data.matrix} +#' If applied to a data.frame, the native R function \code{as.matrix} +#' converts all columns into character class. In contrast, if applied to +#' a data.frame the native R function \code{data.matrix} converts +#' the data.frame to a matrix but maintains all data columns in their +#' original class #' @param x.name the name of the input object to be coerced to class #' data.frame. Must be specified in inverted commas. But this argument is #' usually specified directly by argument of the clientside function diff --git a/R/blackBoxDS.R b/R/blackBoxDS.R index 8e7e33f1..63cf0160 100644 --- a/R/blackBoxDS.R +++ b/R/blackBoxDS.R @@ -67,8 +67,11 @@ blackBoxDS <- function(input.var.name=NULL, ######################################################## # back-up current .Random.seed and revert on.exit - old_seed <- .Random.seed - on.exit(.Random.seed <- old_seed, add = TRUE) + if (exists(x = ".Random.seed", envir = globalenv())) { + assign(x = ".old_seed", value = .Random.seed, envir = parent.frame()); + on.exit({ assign(x = ".Random.seed", value = parent.frame()$.old_seed, envir = globalenv()); remove(".old_seed", envir = parent.frame()) }, add = TRUE) + } else + on.exit(if (exists(x = ".Random.seed", envir = globalenv())) remove(".Random.seed", envir = globalenv()), add = TRUE) input.var <- eval(parse(text=input.var.name), envir = parent.frame()) diff --git a/R/blackBoxRanksDS.R b/R/blackBoxRanksDS.R index cb2fc21f..522307a8 100644 --- a/R/blackBoxRanksDS.R +++ b/R/blackBoxRanksDS.R @@ -62,8 +62,11 @@ blackBoxRanksDS <- function(input.var.name=NULL, shared.seedval){ #START FUNC ######################################################## # back-up current .Random.seed and revert on.exit - old_seed <- .Random.seed - on.exit(.Random.seed <- old_seed, add = TRUE) + if (exists(x = ".Random.seed", envir = globalenv())) { + assign(x = ".old_seed", value = .Random.seed, envir = parent.frame()); + on.exit({ assign(x = ".Random.seed", value = parent.frame()$.old_seed, envir = globalenv()); remove(".old_seed", envir = parent.frame()) }, add = TRUE) + } else + on.exit(if (exists(x = ".Random.seed", envir = globalenv())) remove(".Random.seed", envir = globalenv()), add = TRUE) input.var <- eval(parse(text=input.var.name), envir = parent.frame()) input.global.ranks<-input.var diff --git a/R/cbindDS.R b/R/cbindDS.R index 5d5464e7..b7864864 100644 --- a/R/cbindDS.R +++ b/R/cbindDS.R @@ -24,7 +24,7 @@ cbindDS <- function(x.names.transmit=NULL, colnames.transmit=NULL){ # Check Permissive Privacy Control Level. - dsBase::checkPermissivePrivacyControlLevel(c('permissive', 'banana')) + dsBase::checkPermissivePrivacyControlLevel(c('permissive', 'banana', 'carrot')) x.names.input <- x.names.transmit x.names.act1 <- unlist(strsplit(x.names.input, split=",")) diff --git a/R/checkNegValueDS.R b/R/checkNegValueDS.R index 3f46f3fa..48629138 100644 --- a/R/checkNegValueDS.R +++ b/R/checkNegValueDS.R @@ -1,7 +1,7 @@ #' #' @title Checks if a numeric variable has negative values #' @description this function is only called by the client function \code{ds.glm}. -#' @details if a user sets the parameter 'weights' on the client site function \code{ds.glm} this +#' @details if a user sets the parameter 'weights' on the client side function \code{ds.glm} this #' server side function is called to verify that the 'weights' vector does not have negative values #' because no negative are allowed in weights. #' @param weights a numeric vector diff --git a/R/dataFrameDS.R b/R/dataFrameDS.R index a3e06f4c..7a2b36be 100644 --- a/R/dataFrameDS.R +++ b/R/dataFrameDS.R @@ -40,7 +40,7 @@ dataFrameDS <- function(vectors=NULL, r.names=NULL, ch.rows=FALSE, ch.names=TRUE, clnames=NULL, strAsFactors=TRUE, completeCases=FALSE){ # Check Permissive Privacy Control Level. - dsBase::checkPermissivePrivacyControlLevel(c('permissive', 'banana')) + dsBase::checkPermissivePrivacyControlLevel(c('permissive', 'banana', 'carrot')) ######################################################################### # DataSHIELD MODULE: CAPTURE THE nfilter SETTINGS diff --git a/R/dataFrameSortDS.R b/R/dataFrameSortDS.R index 0ea3b3c1..a398a70b 100644 --- a/R/dataFrameSortDS.R +++ b/R/dataFrameSortDS.R @@ -36,7 +36,7 @@ dataFrameSortDS <- function(df.name=NULL,sort.key.name=NULL,sort.descending,sort.method){ # Check Permissive Privacy Control Level. - dsBase::checkPermissivePrivacyControlLevel(c('permissive', 'banana')) + dsBase::checkPermissivePrivacyControlLevel(c('permissive', 'banana', 'carrot')) ######################################################################### # DataSHIELD MODULE: CAPTURE THE nfilter SETTINGS diff --git a/R/dataFrameSubsetDS1.R b/R/dataFrameSubsetDS1.R index 232e405c..4b3c9476 100644 --- a/R/dataFrameSubsetDS1.R +++ b/R/dataFrameSubsetDS1.R @@ -50,7 +50,7 @@ dataFrameSubsetDS1 <- function(df.name=NULL,V1.name=NULL,V2.name=NULL,Boolean.operator.n=NULL,keep.cols=NULL,rm.cols=NULL,keep.NAs=NULL){ # Check Permissive Privacy Control Level. - dsBase::checkPermissivePrivacyControlLevel(c('permissive', 'banana')) + dsBase::checkPermissivePrivacyControlLevel(c('permissive', 'banana', 'carrot')) ######################################################################### # DataSHIELD MODULE: CAPTURE THE nfilter SETTINGS diff --git a/R/dataFrameSubsetDS2.R b/R/dataFrameSubsetDS2.R index 3a59602f..05938a60 100644 --- a/R/dataFrameSubsetDS2.R +++ b/R/dataFrameSubsetDS2.R @@ -59,7 +59,7 @@ dataFrameSubsetDS2<-function(df.name=NULL,V1.name=NULL, V2.name=NULL, Boolean.operator.n=NULL,keep.cols=NULL, rm.cols=NULL, keep.NAs=NULL){ # Check Permissive Privacy Control Level. - dsBase::checkPermissivePrivacyControlLevel(c('permissive', 'banana')) + dsBase::checkPermissivePrivacyControlLevel(c('permissive', 'banana', 'carrot')) ######################################################################### # DataSHIELD MODULE: CAPTURE THE nfilter SETTINGS # diff --git a/R/densityGridDS.R b/R/densityGridDS.R index 786302f7..1569d5c1 100644 --- a/R/densityGridDS.R +++ b/R/densityGridDS.R @@ -1,6 +1,6 @@ #' #' @title Generates a density grid with or without a priori defined limits -#' @description Generates a density grid that can then be used for heatmap or countour plots. +#' @description Generates a density grid that can then be used for heatmap or contour plots. #' @details Invalid cells (cells with count < to the set filter value for the minimum allowed #' counts in table cells) are turn to 0. #' @param xvect a numerical vector diff --git a/R/dmtC2SDS.R b/R/dmtC2SDS.R index 5d315e3d..9b4329ad 100644 --- a/R/dmtC2SDS.R +++ b/R/dmtC2SDS.R @@ -22,10 +22,10 @@ #' @param ncols.transmit specifies the number of columns in the matrix to be created. #' Fixed by the clientside function as equal to the number of columns in #' the clientside DMT to be transferred. -#' @param colnames.transmit a parser-transmissable vector specifying the name of each column +#' @param colnames.transmit a parser-transmissible vector specifying the name of each column #' in the DMT being transferred from clientside to serverside. #' Generated automatically by clientside function from colnames of clientside DMT. -#' @param colclass.transmit a parser-transmissable vector specifying the class of the +#' @param colclass.transmit a parser-transmissible vector specifying the class of the #' vector representing each individual column in the DMT to be transferred. #' Generated automatically by clientside function. This allows the transmission of DMTs #' containing columns with different classes.If something is going to go wrong with diff --git a/R/dsBase-package.R b/R/dsBase-package.R new file mode 100644 index 00000000..a65cf643 --- /dev/null +++ b/R/dsBase-package.R @@ -0,0 +1,6 @@ +#' @keywords internal +"_PACKAGE" + +## usethis namespace: start +## usethis namespace: end +NULL diff --git a/R/gamlssDS.R b/R/gamlssDS.R index 322f0661..533da0e1 100644 --- a/R/gamlssDS.R +++ b/R/gamlssDS.R @@ -1,5 +1,5 @@ #' -#' @title gamlssDS an aggregate function called by ds.galmss +#' @title gamlssDS an aggregate function called by ds.gamlss #' @description This function calls the gamlssDS that is a wrapper function from #' the gamlss R package. The function returns an object of class "gamlss", which #' is a generalized additive model for location, scale and shape (GAMLSS). The diff --git a/R/glmerSLMADS.assign.R b/R/glmerSLMADS.assign.R index 9e39c72b..df367ea1 100644 --- a/R/glmerSLMADS.assign.R +++ b/R/glmerSLMADS.assign.R @@ -5,7 +5,7 @@ #' The analytic work engine is the glmer function in R which sits in the lme4 package. #' glmerSLMADS.assign fits a generalized linear mixed effects model (glme) - e.g. a logistic or #' Poisson regression model including both fixed and random effects - on data -#' from each single data source and saves the regression outcomes on the serveside. +#' from each single data source and saves the regression outcomes on the serverside. #' @param formula see help for ds.glmerSLMA #' @param offset see help for ds.glmerSLMA #' @param weights see help for ds.glmerSLMA diff --git a/R/global.R b/R/global.R index 89970bc0..a3c9454e 100644 --- a/R/global.R +++ b/R/global.R @@ -2,5 +2,4 @@ utils::globalVariables(c('offset.to.use', 'weights.to.use', 'out.table.real', 'out.table.dim', 'out.table.dimnames', 'list.obj', 'mg', 'blackbox.output.df', 'blackbox.ranks.df', 'global.bounds.df', 'global.ranks.quantiles.df', 'sR4.df', - 'min.max.df','sR5.df','input.mean.sd.df','input.ranks.sd.df','RS','CG','mixed', 'x','y')) - + 'min.max.df','sR5.df','input.mean.sd.df','input.ranks.sd.df','RS','CG','mixed','x','y','.old_seed','.Random.seed')) diff --git a/R/heatmapPlotDS.R b/R/heatmapPlotDS.R index 6dccbd5b..349f57b9 100644 --- a/R/heatmapPlotDS.R +++ b/R/heatmapPlotDS.R @@ -39,8 +39,11 @@ heatmapPlotDS <- function(x, y, k, noise, method.indicator){ ################################################################### # back-up current .Random.seed and revert on.exit - old_seed <- .Random.seed - on.exit(.Random.seed <- old_seed, add = TRUE) + if (exists(x = ".Random.seed", envir = globalenv())) { + assign(x = ".old_seed", value = .Random.seed, envir = parent.frame()); + on.exit({ assign(x = ".Random.seed", value = parent.frame()$.old_seed, envir = globalenv()); remove(".old_seed", envir = parent.frame()) }, add = TRUE) + } else + on.exit(if (exists(x = ".Random.seed", envir = globalenv())) remove(".Random.seed", envir = globalenv()), add = TRUE) # Cbind the columns of the two variables and remove any rows that include NAs data.table <- cbind.data.frame(x, y) diff --git a/R/hetcorDS.R b/R/hetcorDS.R index 3cf57366..d561ce30 100644 --- a/R/hetcorDS.R +++ b/R/hetcorDS.R @@ -1,7 +1,7 @@ #' #' @title Heterogeneous Correlation Matrix #' @description This function is based on the hetcor function from the R package \code{polycor}. -#' @details Computes a heterogenous correlation matrix, consisting of Pearson product-moment +#' @details Computes a heterogeneous correlation matrix, consisting of Pearson product-moment #' correlations between numeric variables, polyserial correlations between numeric and ordinal #' variables, and polychoric correlations between ordinal variables. #' @param data the name of a data frame consisting of factors, ordered factors, logical variables, diff --git a/R/histogramDS1.R b/R/histogramDS1.R index a79d2f52..71ffc439 100644 --- a/R/histogramDS1.R +++ b/R/histogramDS1.R @@ -37,8 +37,11 @@ histogramDS1 <- function(xvect, method.indicator, k, noise){ ################################################################## # back-up current .Random.seed and revert on.exit - old_seed <- .Random.seed - on.exit(.Random.seed <- old_seed, add = TRUE) + if (exists(x = ".Random.seed", envir = globalenv())) { + assign(x = ".old_seed", value = .Random.seed, envir = parent.frame()); + on.exit({ assign(x = ".Random.seed", value = parent.frame()$.old_seed, envir = globalenv()); remove(".old_seed", envir = parent.frame()) }, add = TRUE) + } else + on.exit(if (exists(x = ".Random.seed", envir = globalenv())) remove(".Random.seed", envir = globalenv()), add = TRUE) # print an error message if the input vector is not a numeric if(!(is.numeric(xvect))){ diff --git a/R/histogramDS2.R b/R/histogramDS2.R index 1f7a8acc..001ab0dd 100644 --- a/R/histogramDS2.R +++ b/R/histogramDS2.R @@ -39,8 +39,11 @@ histogramDS2 <- function (xvect, num.breaks, min, max, method.indicator, k, nois ################################################################## # back-up current .Random.seed and revert on.exit - old_seed <- .Random.seed - on.exit(.Random.seed <- old_seed, add = TRUE) + if (exists(x = ".Random.seed", envir = globalenv())) { + assign(x = ".old_seed", value = .Random.seed, envir = parent.frame()); + on.exit({ assign(x = ".Random.seed", value = parent.frame()$.old_seed, envir = globalenv()); remove(".old_seed", envir = parent.frame()) }, add = TRUE) + } else + on.exit(if (exists(x = ".Random.seed", envir = globalenv())) remove(".Random.seed", envir = globalenv()), add = TRUE) if (method.indicator==1){ diff --git a/R/levelsDS.R b/R/levelsDS.R index 9bb54401..bdb374d5 100644 --- a/R/levelsDS.R +++ b/R/levelsDS.R @@ -10,7 +10,7 @@ levelsDS <- function(x){ # Check Permissive Privacy Control Level. - dsBase::checkPermissivePrivacyControlLevel(c('permissive', 'banana')) + dsBase::checkPermissivePrivacyControlLevel(c('permissive', 'banana', 'carrot')) ################################################################## #MODULE 1: CAPTURE THE nfilter SETTINGS # diff --git a/R/lexisDS2.R b/R/lexisDS2.R index e4c08117..d63989d2 100644 --- a/R/lexisDS2.R +++ b/R/lexisDS2.R @@ -3,7 +3,7 @@ #' @description The second serverside function called by ds.lexis. #' @details This is the assign #' function which actually creates -#' the expanded dataframe containing surival data for a piecewise exponential +#' the expanded dataframe containing survival data for a piecewise exponential #' regression. lexisDS2 also #' carries out a series of disclosure checks and if the arguments or data fail any of #' those tests, diff --git a/R/lmerSLMADS.assign.R b/R/lmerSLMADS.assign.R index 526818a1..06b2bcd1 100644 --- a/R/lmerSLMADS.assign.R +++ b/R/lmerSLMADS.assign.R @@ -1,10 +1,10 @@ #' @title Fitting linear mixed effect models - serverside function -#' @description lmerSLMADS.assing is the same as lmerSLMADS2 which fits a linear +#' @description lmerSLMADS.assign is the same as lmerSLMADS2 which fits a linear #' mixed effects model (lme) per study and saves the outcomes in each study #' @details lmerSLMADS.assign is a serverside function called by ds.lmerSLMA on the clientside. #' The analytic work engine is the lmer function in R which sits in the lme4 package. #' lmerSLMADS.assign fits a linear mixed effects model (lme) including both fixed and random -#' effects - on data from each single data source and saves the regression outcomes on the serveside. +#' effects - on data from each single data source and saves the regression outcomes on the serverside. #' @param formula see help for ds.lmerSLMA #' @param offset see help for ds.lmerSLMA #' @param weights see help for ds.lmerSLMA diff --git a/R/mdPatternDS.R b/R/mdPatternDS.R new file mode 100644 index 00000000..0f817034 --- /dev/null +++ b/R/mdPatternDS.R @@ -0,0 +1,121 @@ +#' +#' @title Missing data pattern with disclosure control +#' @description This function is a serverside aggregate function that computes the +#' missing data pattern using mice::md.pattern and applies disclosure control to +#' prevent revealing small cell counts. +#' @details This function calls the mice::md.pattern function to generate a matrix +#' showing the missing data patterns in the input data. To ensure disclosure control, +#' any pattern counts that are below the threshold (nfilter.tab, default=3) are +#' suppressed. +#' +#' \strong{Suppression Method:} +#' +#' When a pattern count is below threshold: +#' - Row name is changed to "suppressed()" where N is the threshold +#' - All pattern values in that row are set to NA +#' - Summary row is also set to NA (prevents back-calculation) +#' +#' \strong{Output Matrix Structure:} +#' +#' - Rows represent different missing data patterns (plus a summary row at the bottom) +#' - Row names contain pattern counts (or "suppressed()" for invalid patterns) +#' - Columns show 1 if variable is observed, 0 if missing +#' - Last column shows total number of missing values per pattern +#' - Last row shows total number of missing values per variable +#' +#' \strong{Note for Pooling:} +#' +#' When this function is called from ds.mdPattern with type='combine', suppressed +#' patterns are excluded from pooling to prevent disclosure through subtraction. +#' This means pooled counts may underestimate the true total when patterns are +#' suppressed in some studies. +#' +#' @param x a character string specifying the name of a data frame or matrix +#' containing the data to analyze for missing patterns. +#' @return A list containing: +#' \item{pattern}{The missing data pattern matrix with disclosure control applied} +#' \item{valid}{Logical indicating if all patterns meet disclosure requirements} +#' \item{message}{A message describing the validity status} +#' @author Xavier Escribà montagut for DataSHIELD Development Team +#' @import mice +#' @export +#' +mdPatternDS <- function(x){ + + ############################################################# + # MODULE 1: CAPTURE THE nfilter SETTINGS + thr <- dsBase::listDisclosureSettingsDS() + nfilter.tab <- as.numeric(thr$nfilter.tab) + ############################################################# + + # Parse the input data name with error handling + x.val <- tryCatch( + { + eval(parse(text=x), envir = parent.frame()) + }, + error = function(e) { + stop(paste0("Object '", x, "' does not exist on the server"), call. = FALSE) + } + ) + + # Check object class + typ <- class(x.val) + + # Check that input is a data frame or matrix + if(!("data.frame" %in% typ || "matrix" %in% typ)){ + stop(paste0("The input object must be of type 'data.frame' or 'matrix'. Current type: ", + paste(typ, collapse = ", ")), call. = FALSE) + } + + # Use x.val for further processing + x <- x.val + + # Call mice::md.pattern with plot=FALSE + pattern <- mice::md.pattern(x, plot = FALSE) + + # Apply disclosure control + # Pattern counts are stored in row names (except last row which is empty/summary) + # The last row contains variable-level missing counts + + validity <- "valid" + n_patterns <- nrow(pattern) - 1 # exclude the summary row + + if(n_patterns > 0){ + # Check pattern counts (stored in row names, excluding last row) + pattern_counts <- as.numeric(rownames(pattern)[1:n_patterns]) + + # Find patterns with counts below threshold + invalid_idx <- which(pattern_counts > 0 & pattern_counts < nfilter.tab) + + if(length(invalid_idx) > 0){ + validity <- "invalid" + + # For invalid patterns, suppress by: + # - Setting row name to "suppressed" + # - Setting all pattern values to NA + rnames <- rownames(pattern) + for(idx in invalid_idx){ + rnames[idx] <- paste0("suppressed(<", nfilter.tab, ")") + pattern[idx, ] <- NA + } + rownames(pattern) <- rnames + + # Also need to recalculate the last row (summary) if patterns were suppressed + # Set to NA to avoid disclosures + pattern[nrow(pattern), seq_len(ncol(pattern))] <- NA + } + } + + # Return the pattern with validity information + return(list( + pattern = pattern, + valid = (validity == "valid"), + message = ifelse(validity == "valid", + "Valid: all pattern counts meet disclosure requirements", + paste0("Invalid: some pattern counts below threshold (", + nfilter.tab, ") have been suppressed")) + )) +} + +#AGGREGATE FUNCTION +# mdPatternDS diff --git a/R/meanDS.R b/R/meanDS.R index b186b4a9..59d1bc4e 100644 --- a/R/meanDS.R +++ b/R/meanDS.R @@ -1,5 +1,5 @@ #' -#' @title Computes statistical mean of a vectores +#' @title Computes statistical mean of a vector #' @description Calculates the mean value. #' @details if the length of input vector is less than the set filter #' a missing value is returned. diff --git a/R/metadataDS.R b/R/metadataDS.R index 3a478d48..a416ed61 100644 --- a/R/metadataDS.R +++ b/R/metadataDS.R @@ -4,7 +4,7 @@ #' @details The function returns the metadata, obtained from attributes function. #' @param x a string character, containing the name of the specified variable #' @return a list containing the metadata. The elements of the list will depend -#' on the meatadata available. +#' on the metadata available. #' @author Stuart Wheater, for DataSHIELD Development Team #' @export #' diff --git a/R/minMaxRandDS.R b/R/minMaxRandDS.R index 85e4d3fc..8846147f 100644 --- a/R/minMaxRandDS.R +++ b/R/minMaxRandDS.R @@ -22,8 +22,11 @@ minMaxRandDS <- function(input.var.name){ #START FUNC # back-up current .Random.seed and revert on.exit - old_seed <- .Random.seed - on.exit(.Random.seed <- old_seed, add = TRUE) + if (exists(x = ".Random.seed", envir = globalenv())) { + assign(x = ".old_seed", value = .Random.seed, envir = parent.frame()); + on.exit({ assign(x = ".Random.seed", value = parent.frame()$.old_seed, envir = globalenv()); remove(".old_seed", envir = parent.frame()) }, add = TRUE) + } else + on.exit(if (exists(x = ".Random.seed", envir = globalenv())) remove(".Random.seed", envir = globalenv()), add = TRUE) input.var <- eval(parse(text=input.var.name), envir = parent.frame()) diff --git a/R/qlsplineDS.R b/R/qlsplineDS.R index 7b9d5e4b..3df267f8 100644 --- a/R/qlsplineDS.R +++ b/R/qlsplineDS.R @@ -15,7 +15,7 @@ #' probabilities directly (the vector is passed to argument probs of quantile). #' @param x the name of the input numeric variable #' @param q numeric, a single scalar greater or equal to 2 for a number of equal-frequency -#' intervals along x or a vector of numbers in (0; 1) specifying the quantiles explicitely. +#' intervals along x or a vector of numbers in (0; 1) specifying the quantiles explicitly. #' @param na.rm logical, whether NA should be removed when calculating quantiles, passed #' to na.rm of quantile. Default set to TRUE. #' @param marginal logical, how to parametrize the spline, see Details diff --git a/R/rangeDS.R b/R/rangeDS.R index 6cf7689c..ec9183ea 100644 --- a/R/rangeDS.R +++ b/R/rangeDS.R @@ -9,8 +9,11 @@ #' rangeDS <- function(xvect) { # back-up current .Random.seed and revert on.exit - old_seed <- .Random.seed - on.exit(.Random.seed <- old_seed, add = TRUE) + if (exists(x = ".Random.seed", envir = globalenv())) { + assign(x = ".old_seed", value = .Random.seed, envir = parent.frame()); + on.exit({ assign(x = ".Random.seed", value = parent.frame()$.old_seed, envir = globalenv()); remove(".old_seed", envir = parent.frame()) }, add = TRUE) + } else + on.exit(if (exists(x = ".Random.seed", envir = globalenv())) remove(".Random.seed", envir = globalenv()), add = TRUE) # print an error message if the input vector is not a numeric if (!(is.numeric(xvect))) { diff --git a/R/reShapeDS.R b/R/reShapeDS.R index 6dbf0654..2ec368a5 100644 --- a/R/reShapeDS.R +++ b/R/reShapeDS.R @@ -43,7 +43,7 @@ reShapeDS <- function(data.name, varying.transmit, v.names.transmit, timevar.name, idvar.name, drop.transmit, direction, sep){ # Check Permissive Privacy Control Level. - dsBase::checkPermissivePrivacyControlLevel(c('permissive', 'banana')) + dsBase::checkPermissivePrivacyControlLevel(c('permissive', 'banana', 'carrot')) datatext <- paste0("data.frame(",data.name,")") data <- eval(parse(text=datatext), envir = parent.frame()) diff --git a/R/recodeLevelsDS.R b/R/recodeLevelsDS.R index c773e774..6878faa6 100644 --- a/R/recodeLevelsDS.R +++ b/R/recodeLevelsDS.R @@ -11,7 +11,7 @@ recodeLevelsDS <- function (x=NULL, classes=NULL){ # Check Permissive Privacy Control Level. - dsBase::checkPermissivePrivacyControlLevel(c('permissive', 'banana')) + dsBase::checkPermissivePrivacyControlLevel(c('permissive', 'banana', 'carrot')) # check if the input vector is valid (i.e. meets DataSHIELD criteria) check <- isValidDS(x) diff --git a/R/recodeValuesDS.R b/R/recodeValuesDS.R index f29dc0fa..d22a7862 100644 --- a/R/recodeValuesDS.R +++ b/R/recodeValuesDS.R @@ -31,7 +31,7 @@ recodeValuesDS <- function(var.name.text=NULL, values2replace.text=NULL, new.values.text=NULL, missing=NULL){ # Check Permissive Privacy Control Level. - dsBase::checkPermissivePrivacyControlLevel(c('permissive', 'banana')) + dsBase::checkPermissivePrivacyControlLevel(c('permissive', 'banana', 'carrot')) ############################################################# #MODULE 1: CAPTURE THE used nfilter SETTINGS diff --git a/R/repDS.R b/R/repDS.R index 4dd8a94f..61b4e3ea 100644 --- a/R/repDS.R +++ b/R/repDS.R @@ -41,7 +41,7 @@ #' help for " (see above). This parameter is usually fully defined by #' the argument in the call to \code{ds.rep} that itself calls \code{repDS}. #' @param length.out.transmit This argument fixes the length of -#' the output repetive sequence vector +#' the output repetitive sequence vector #' For behaviour see help for \code{ds.rep} and "details from native R #' help for " (see above). This parameter is usually fully defined by #' the argument in the call to \code{ds.rep} that itself calls \code{repDS}. diff --git a/R/rmDS.R b/R/rmDS.R index d7e4741d..04a3b1e1 100644 --- a/R/rmDS.R +++ b/R/rmDS.R @@ -9,7 +9,7 @@ #' fail because the effect of the function is to delete the object and so #' it would be impossible to write it anywhere. #' @param x.names.transmit, the names of the objects to be deleted converted -#' into transmissable form, a comma seperated list of character string. The +#' into transmissible form, a comma separated list of character string. The #' argument is specified via the argument of ds.rm #' @return the specified object is deleted from the serverside. If this #' is successful the message "Object successfully deleted" is returned diff --git a/R/scatterPlotDS.R b/R/scatterPlotDS.R index bf229041..ab48be10 100644 --- a/R/scatterPlotDS.R +++ b/R/scatterPlotDS.R @@ -44,8 +44,11 @@ scatterPlotDS <- function(x, y, method.indicator, k, noise){ ################################################################### # back-up current .Random.seed and revert on.exit - old_seed <- .Random.seed - on.exit(.Random.seed <- old_seed, add = TRUE) + if (exists(x = ".Random.seed", envir = globalenv())) { + assign(x = ".old_seed", value = .Random.seed, envir = parent.frame()); + on.exit({ assign(x = ".Random.seed", value = parent.frame()$.old_seed, envir = globalenv()); remove(".old_seed", envir = parent.frame()) }, add = TRUE) + } else + on.exit(if (exists(x = ".Random.seed", envir = globalenv())) remove(".Random.seed", envir = globalenv()), add = TRUE) # Cbind the columns of the two variables and remove any rows that include NAs data.table <- cbind.data.frame(x, y) diff --git a/R/subsetByClassDS.R b/R/subsetByClassDS.R new file mode 100644 index 00000000..e4063482 --- /dev/null +++ b/R/subsetByClassDS.R @@ -0,0 +1,79 @@ +#' +#' @title Breaks down a dataframe or a factor into its sub-classes +#' @description The function takes a categorical vector or dataframe as input and generates subset(s) +#' vectors or dataframes for each category. Subsets are considered invalid if they hold between 1 and +#' 4 observations. +#' @details If the input data object is a dataframe it is possible to specify the variables +#' to subset on. If a subset is not 'valid' all its the values are reported as missing (i.e. NA), +#' the name of the subsets is labelled as '_INVALID'. If no variables are specified to subset on, +#' the dataframe will be subset on each of its factor variables. +#' And if none of the columns holds a factor variable a message is issued as output. A message is also +#' issued as output if the input vector is not of type factor. +#' @param data a string character, the name of the dataframe or the factor vector +#' @param variables a vector of string characters, the names of the the variables to subset on. +#' @return a list which contains the subsetted datasets +#' @author Gaye, A. +#' @export +#' +subsetByClassDS <- function(data=NULL, variables=NULL){ + + # Check Permissive Privacy Control Level. + dsBase::checkPermissivePrivacyControlLevel(c('permissive', 'banana', 'carrot')) + + # this filter sets the minimum number of observations that are allowed + + ############################################################# + # MODULE 1: CAPTURE THE nfilter SETTINGS + thr <- dsBase::listDisclosureSettingsDS() + #nfilter.tab <- as.numeric(thr$nfilter.tab) + #nfilter.glm <- as.numeric(thr$nfilter.glm) + nfilter.subset <- as.numeric(thr$nfilter.subset) + #nfilter.string <- as.numeric(thr$nfilter.string) + ############################################################# + + # evaluate the string passed on to the function as an object + input <- eval(parse(text=data), envir = parent.frame()) + + # subsetting is carried out only of the input is of type factor or data.frame + if(is.factor(input)){ + # call the internal function that generates subsets if the input is a factor variable + Dname <- extract(data)[[2]] + output <- subsetByClassHelper1(input, Dname, nfilter.subset) + }else{ + # get the names of the variables on the dataset + varnames <- colnames(input) + + # set the number of loops depending on the number of variables specified + # if no variable was specified then loop through all the variables + if(is.null(variables)){ + loop <- c(1:dim(input)[2]) + }else{ + # if the user specified variables to subset on check if those are in the dataset to subset from, + # if none of the variables is on the dataset record a message to inform the user + indx <- which(varnames %in% variables) + + if(length(indx) > 1){ + loop <- indx + }else{ + loop <- 1 + } + } + + # loop through the variables and make a subset dataframe for each level + # of each factor variable and keep the generated subset dataframes in a list + if(length(loop) > 1){ + # call the function that gets the subsets if the user specified non or more than 1 variable + out.temp <- subsetByClassHelper2(input,loop,nfilter.subset) + subsets <- out.temp[[1]] + nonfactorvars <- out.temp[[2]] + }else{ + # call the function that gets the subsets if the user specified only one variable to subset by + out.temp <- subsetByClassHelper3(input,indx,nfilter.subset) + subsets <- out.temp[[1]] + nonfactorvars <- out.temp[[2]] + } + output <- subsets + } + + return(output) +} diff --git a/R/subsetDS.R b/R/subsetDS.R new file mode 100644 index 00000000..2ab0a14c --- /dev/null +++ b/R/subsetDS.R @@ -0,0 +1,134 @@ +#' +#' @title Generates a valid subset of a table or a vector +#' @description The function uses the R classical subsetting with squared brackets '[]' and allows also to +#' subset using a logical operator and a threshold. The object to subset from must be a vector (factor, numeric +#' or character) or a table (data.frame or matrix). +#' @details If the input data is a table: The user specifies the rows and/or columns to include in the subset if the input +#' object is a table; the columns can be referred to by their names. The name of a vector (i.e. a variable) can also be provided +#' with a logical operator and a threshold (see example 3). +#' If the input data is a vector: when the parameters 'rows', 'logical' and 'threshold' are all provided the last two are ignored ( +#' 'rows' has precedence over the other two parameters then). +#' If the requested subset is not valid (i.e. contains less than the allowed +#' number of observations), the subset is not generated, rather a table or a vector of missing values is generated to allow +#' for any subsequent process using the output of the function to proceed after informing the user via a message. +#' @param dt a string character, the name of the dataframe or the factor vector and the range of the subset. +#' @param complt a boolean that tells if the subset to subset should include only complete cases +#' @param rs a vector of two integers that give the range of rows de extract. +#' @param cs a vector of two integers or one or more characters; the indices of the columns to extract or the names of the columns (i.e. +#' names of the variables to extract). +#' @param lg a character, the logical parameter to use if the user wishes to subset a vector using a logical +#' operator. This parameter is ignored if the input data is not a vector. +#' @param th a numeric, the threshold to use in conjunction with the logical parameter. This parameter is ignored +#' if the input data is not a vector. +#' @param varname a character, if the input data is a table, if this parameter is provided along with the 'logical' and 'threshold' +#' parameters, a subtable is based the threshold applied to the specified variable. This parameter is however ignored if the parameter +#' 'rows' and/or 'cols' are provided. +#' @return a subset of the vector, matrix or dataframe as specified is stored on the server side +#' @author Gaye, A. +#' @export +#' +subsetDS <- function(dt=NULL, complt=NULL, rs=NULL, cs=NULL, lg=NULL, th=NULL, varname=NULL){ + + # Check Permissive Privacy Control Level. + dsBase::checkPermissivePrivacyControlLevel(c('permissive', 'banana', 'carrot')) + + # this filter sets the minimum number of observations that are allowed + + ############################################################# + # MODULE 1: CAPTURE THE nfilter SETTINGS + thr <- dsBase::listDisclosureSettingsDS() + nfilter.tab <- as.numeric(thr$nfilter.tab) + #nfilter.glm <- as.numeric(thr$nfilter.glm) + #nfilter.subset <- as.numeric(thr$nfilter.subset) + #nfilter.string <- as.numeric(thr$nfilter.string) + ############################################################# + + # the logical operators are given as integers change them into characters + if(!(is.null(lg))){ + if(lg == 1){lg <- ">"} + if(lg == 2){lg <- ">="} + if(lg == 3){lg <- "<"} + if(lg == 4){lg <- "<="} + if(lg == 5){lg <- "=="} + if(lg == 6){lg <- "!="} + } + + # evaluate the input data object + D <- eval(parse(text=dt), envir = parent.frame()) + + # if 'complt' is set to TRUE, get continue with a dataset with complete cases only + if(complt){ + cc <- stats::complete.cases(D) + xx <- which(cc == TRUE) + Dtemp <- D + if(is.vector(D) | is.factor(D)){ + D <- Dtemp[xx] + }else{ + D <- Dtemp[xx,] + } + } + + # carry out the subsetting + if(is.vector(D) | is.factor(D)){ # if the input data is a vector + + if(is.null(rs)){ + if(is.null(lg) | is.null(th)){ + subvect <- D + }else{ + exprs1 <- paste0("D[which(D", lg, th, ")]") + subvect <- eval(parse(text=exprs1)) + } + }else{ + subvect <- D[rs] + } + + if(length(subvect) < nfilter.tab){ + if(length(subvect) == 0){ + output <- D[-c(1:length(D))] + }else{ + temp1 <- subvect + temp1[1:length(temp1)] <- NA + output <- temp1 + } + }else{ + output <- subvect + } + }else{ # if the input data is a table + + if(!(is.null(rs)) | !(is.null(cs))){ + if(!(is.null(rs)) & !(is.null(cs))){ + subtable <- D[rs, cs] + }else{ + if(is.null(cs)){ + cs <- c(1:dim(D)[2]) + } + if(is.null(rs)){ + rs <- c(1:dim(D)[1]) + } + subtable <- D[rs,cs] + } + }else{ + if(is.null(varname)){ + subtable <- D + }else{ + idx <- which(colnames(D) == varname) + exprs2 <- paste0('D[which(D[,',idx,']', lg, th, '),]') + subtable <- eval(parse(text=exprs2)) + } + } + + if((dim(subtable)[1]) < nfilter.tab){ + if((dim(subtable)[1]) == 0){ + output <- D[-c(1:dim(D)[1]),] + }else{ + subD <- subtable + subD[] <- NA + output <- subD + } + }else{ + output <- subtable + } + } + + return(output) +} diff --git a/R/table1DDS.R b/R/table1DDS.R index d73e3867..94bff957 100644 --- a/R/table1DDS.R +++ b/R/table1DDS.R @@ -5,7 +5,7 @@ #' @details It generates a 1-dimensional tables where valid (non-disclosive) 1-dimensional tables are defined #' as data from sources where no table cells have counts between 1 and the set threshold. When the output table #' is invalid all cells but the total count are replaced by missing values. Only the total count is visible -#' on the table returned to the client site. A message is also returned with the 1-dimensional; the message +#' on the table returned to the client side. A message is also returned with the 1-dimensional; the message #' says "invalid table - invalid counts present" if the table is invalid and 'valid table' otherwise. #' @param xvect a numerical vector with discrete values - usually a factor. #' @return a list which contains two elements: 'table', the 1-dimensional table and 'message' a message which diff --git a/R/tableDS.R b/R/tableDS.R index 0ee91817..cf636c02 100644 --- a/R/tableDS.R +++ b/R/tableDS.R @@ -7,15 +7,15 @@ #' the table requested in the format specified by \code{ds.table}. For more #' information see help for \code{ds.table} in DataSHIELD and the \code{table} function #' in native R. -#' @param rvar.transmit is a character string (in inverted commas) specifiying the +#' @param rvar.transmit is a character string (in inverted commas) specifying the #' name of the variable defining the rows in all of the 2 dimensional #' tables that form the output. Fully specified by argument in \code{ds.table}. #' For more information see help for \code{ds.table} -#' @param cvar.transmit is a character string specifiying the +#' @param cvar.transmit is a character string specifying the #' name of the variable defining the columns in all of the 2 dimensional #' tables that form the output. Fully specified by argument in \code{ds.table}. #' For more information see help for \code{ds.table} -#' @param stvar.transmit is a character string specifiying the +#' @param stvar.transmit is a character string specifying the #' name of the variable that indexes the separate two dimensional #' tables in the output if the call specifies a 3 dimensional table. #' Fully specified by argument in \code{ds.table}. diff --git a/R/tableDS.assign.R b/R/tableDS.assign.R index 6dceba21..f7a05909 100644 --- a/R/tableDS.assign.R +++ b/R/tableDS.assign.R @@ -8,15 +8,15 @@ #' as an object named by the argument of \code{ds.table}. For more #' information see help for \code{ds.table} in DataSHIELD and the \code{table} function #' in native R. -#' @param rvar.transmit is a character string (in inverted commas) specifiying the +#' @param rvar.transmit is a character string (in inverted commas) specifying the #' name of the variable defining the rows in all of the 2 dimensional #' tables that form the output. Fully specified by argument in \code{ds.table}. #' For more information see help for \code{ds.table} -#' @param cvar.transmit is a character string specifiying the +#' @param cvar.transmit is a character string specifying the #' name of the variable defining the columns in all of the 2 dimensional #' tables that form the output. Fully specified by argument in \code{ds.table}. #' For more information see help for \code{ds.table} -#' @param stvar.transmit is a character string specifiying the +#' @param stvar.transmit is a character string specifying the #' name of the variable that indexes the separate two dimensional #' tables in the output if the call specifies a 3 dimensional table. #' Fully specified by argument in \code{ds.table}. diff --git a/R/tableDS2.R b/R/tableDS2.R index e1ab252a..3ea4803b 100644 --- a/R/tableDS2.R +++ b/R/tableDS2.R @@ -13,15 +13,15 @@ #' is nevertheless TRUE, the name for the serverside table object defaults #' to 'newObj'. Fully specified by argument in \code{ds.table}. #' For more information see help for \code{ds.table} -#' @param rvar.transmit is a character string (in inverted commas) specifiying the +#' @param rvar.transmit is a character string (in inverted commas) specifying the #' name of the variable defining the rows in all of the 2 dimensional #' tables that form the output. Fully specified by argument in \code{ds.table}. #' For more information see help for \code{ds.table} -#' @param cvar.transmit is a character string specifiying the +#' @param cvar.transmit is a character string specifying the #' name of the variable defining the columns in all of the 2 dimensional #' tables that form the output. Fully specified by argument in \code{ds.table}. #' For more information see help for \code{ds.table} -#' @param stvar.transmit is a character string specifiying the +#' @param stvar.transmit is a character string specifying the #' name of the variable that indexes the separate two dimensional #' tables in the output if the call specifies a 3 dimensional table. #' Fully specified by argument in \code{ds.table}. diff --git a/R/vectorDS.R b/R/vectorDS.R index 8efacf97..e1bff36b 100644 --- a/R/vectorDS.R +++ b/R/vectorDS.R @@ -10,7 +10,7 @@ #' vectorDS <- function(...){ # Check Permissive Privacy Control Level. - dsBase::checkPermissivePrivacyControlLevel(c('permissive', 'banana')) + dsBase::checkPermissivePrivacyControlLevel(c('permissive', 'banana', 'carrot')) # compute the vector's value out <- c(...) diff --git a/R/zzz.R b/R/zzz.R deleted file mode 100644 index 6586ae7c..00000000 --- a/R/zzz.R +++ /dev/null @@ -1,20 +0,0 @@ -ENV <- new.env() - -.onLoad = function(libname, pkgname) { - - #### !!! If making changes, update: .onLoad(), set_opts(), show_opts(), .check_options() - - options( - datashield.privacyLevel = 5, - default.datashield.privacyControlLevel = "banana", - default.nfilter.glm = 0.33, - default.nfilter.kNN = 3, - default.nfilter.string = 80, - default.nfilter.subset = 3, - default.nfilter.stringShort = 20, - default.nfilter.tab = 3, - default.nfilter.noise = 0.25, - default.nfilter.levels.density = 0.33, - default.nfilter.levels.max = 40 - ) -} \ No newline at end of file diff --git a/README.md b/README.md index aac29667..bf9c1ada 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,38 @@ -dsBase -====== +## dsBase: 'DataSHIELD' Server Side Base Functions -DataSHIELD server side base R library. +[![License](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0.html) +[![](https://www.r-pkg.org/badges/version/dsBase?color=black)](https://cran.r-project.org/package=dsBase) +[![R build +status](https://github.com/datashield/dsBase/workflows/R-CMD-check/badge.svg)](https://github.com/datashield/dsBase/actions) +[![Codecov test coverage](https://codecov.io/gh/datashield/dsBase/graph/badge.svg)](https://app.codecov.io/gh/datashield/dsBase) -[![License](https://img.shields.io/badge/license-GPLv3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0.html) +## Installation -About -===== +You can install the released version of dsBase from +[CRAN](https://cran.r-project.org/package=dsBase) with: -DataSHIELD is a software package which allows you to do non-disclosive federated analysis on sensitive data. Our website (https://www.datashield.org) has in depth descriptions of what it is, how it works and how to install it. A key point to highlight is that DataSHIELD has a client-server infrastructure, so the dsBase package (https://github.com/datashield/dsBase) needs to be used in conjuction with the dsBaseClient package (https://github.com/datashield/dsBaseClient) - trying to use one without the other makes no sense. +``` r +install.packages("dsBase") +``` + +And the development version from +[GitHub](https://github.com/datashield/dsBase/) with: + + +``` r +install.packages("remotes") +remotes::install_github("datashield/dsBase", "") + +# Install v6.3.4 with the following +remotes::install_github("datashield/dsBase", "6.3.4") +``` + +For a full list of development branches, checkout https://github.com/datashield/dsBase/branches + + +## About + +DataSHIELD is a software package which allows you to do non-disclosive federated analysis on sensitive data. Our website (https://www.datashield.org) has in depth descriptions of what it is, how it works and how to install it. A key point to highlight is that DataSHIELD has a client-server infrastructure, so the dsBase package (https://github.com/datashield/dsBase) needs to be used in conjunction with the dsBaseClient package (https://github.com/datashield/dsBaseClient) - trying to use one without the other makes no sense. Detailed instructions on how to install DataSHIELD are at https://wiki.datashield.org/. The code here is organised as: @@ -18,3 +42,40 @@ Detailed instructions on how to install DataSHIELD are at https://wiki.datashiel | obiba CRAN | Where you probably should install DataSHIELD from. | | releases | Stable releases. | | master branch | Mostly in sync with the latest release, changes rarely. | + +## References + +[1] Burton P, Wilson R, Butters O, Ryser-Welch P, Westerberg A, Abarrategui L, Villegas-Diaz R, + Avraam D, Marcon Y, Bishop T, Gaye A, Escribà Montagut X, Wheater S (2025). _dsBase: 'DataSHIELD' Server Side Base Functions_. R + package version 6.3.4. . + +[2] Gaye A, Marcon Y, Isaeva J, LaFlamme P, Turner A, Jones E, Minion J, Boyd A, Newby C, Nuotio + M, Wilson R, Butters O, Murtagh B, Demir I, Doiron D, Giepmans L, Wallace S, Budin-Ljøsne I, + Oliver Schmidt C, Boffetta P, Boniol M, Bota M, Carter K, deKlerk N, Dibben C, Francis R, + Hiekkalinna T, Hveem K, Kvaløy K, Millar S, Perry I, Peters A, Phillips C, Popham F, Raab G, + Reischl E, Sheehan N, Waldenberger M, Perola M, van den Heuvel E, Macleod J, Knoppers B, + Stolk R, Fortier I, Harris J, Woffenbuttel B, Murtagh M, Ferretti V, Burton P (2014). + “DataSHIELD: taking the analysis to the data, not the data to the analysis.” _International + Journal of Epidemiology_, *43*(6), 1929-1944. . + +[3] Wilson R, W. Butters O, Avraam D, Baker J, Tedds J, Turner A, Murtagh M, R. Burton P (2017). + “DataSHIELD – New Directions and Dimensions.” _Data Science Journal_, *16*(21), 1-21. + . + +[4] Avraam D, Wilson R, Aguirre Chan N, Banerjee S, Bishop T, Butters O, Cadman T, Cederkvist L, + Duijts L, Escribà Montagut X, Garner H, Gonçalves G, González J, Haakma S, Hartlev M, + Hasenauer J, Huth M, Hyde E, Jaddoe V, Marcon Y, Mayrhofer M, Molnar-Gabor F, Morgan A, + Murtagh M, Nestor M, Nybo Andersen A, Parker S, Pinot de Moira A, Schwarz F, + Strandberg-Larsen K, Swertz M, Welten M, Wheater S, Burton P (2024). “DataSHIELD: + mitigating disclosure risk in a multi-site federated analysis platform.” _Bioinformatics + Advances_, *5*(1), 1-21. . + +> **_Note:_** Apple Mx architecture users, please be aware that there are some numerical limitations on this platform, which leads to unexpected results when using base R packages, like stats​. +> +> x <- c(0, 3, 7) +> +> 1 - cor(x, x)​ +> +> The above should result in a value of zero. +> +> _Also See:_ For more details see https://cran.r-project.org/doc/FAQ/R-FAQ.html#Why-doesn_0027t-R-think-these-numbers-are-equal_003f and the bug report: https://bugs.r-project.org/show_bug.cgi?id=18941 diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 86557410..a36b42c3 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -122,7 +122,7 @@ jobs: sudo R -e "install.packages('purrr', dependencies=TRUE)" sudo R -e "install.packages('tibble', dependencies=TRUE)" sudo R -e "install.packages('tidyselect', dependencies=TRUE)" - sudo R -e "install.packages('tidyverse', dependencies=TRUE)" + sudo R -e "install.packages('tidytable', dependencies=TRUE)" displayName: 'Install all dependencies for dsBase' condition: succeeded() diff --git a/docs/404.html b/docs/404.html index cc165c31..0e809232 100644 --- a/docs/404.html +++ b/docs/404.html @@ -1,5 +1,5 @@ - + @@ -32,7 +32,7 @@ dsBase - 6.4.0-9000 + 6.4.0.9000 @@ -73,12 +73,12 @@

Page not found (404)

-

Site built with pkgdown 2.1.3.

+

Site built with pkgdown 2.2.0.

diff --git a/docs/LICENSE.html b/docs/LICENSE.html index 07b43ee3..f1cadf0d 100644 --- a/docs/LICENSE.html +++ b/docs/LICENSE.html @@ -1,5 +1,5 @@ -NA • dsBaseNA • dsBase @@ -17,7 +17,7 @@ dsBase - 6.4.0-9000 + 6.4.0.9000 @@ -256,11 +256,11 @@

NA

-

Site built with pkgdown 2.1.3.

+

Site built with pkgdown 2.2.0.

diff --git a/docs/authors.html b/docs/authors.html index 6ce542a0..3526a873 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -1,5 +1,5 @@ -Authors and Citation • dsBaseAuthors and Citation • dsBase @@ -17,7 +17,7 @@ dsBase - 6.4.0-9000 + 6.4.0.9000 @@ -40,19 +40,19 @@

Authors and Citation

  • -

    Paul Burton. Author. +

    Paul Burton. Author.

  • -

    Rebecca Wilson. Author. +

    Rebecca Wilson. Author.

  • -

    Olly Butters. Author. +

    Olly Butters. Author.

  • -

    Patricia Ryser-Welch. Author. +

    Patricia Ryser-Welch. Author.

  • @@ -75,6 +75,18 @@

    Authors and Citation

    Yannick Marcon. Author.

  • +
  • +

    Tom Bishop. Author. +

    +
  • +
  • +

    Amadou Gaye. Author. +

    +
  • +
  • +

    Xavier Escribà-Montagut. Author. +

    +
  • Stuart Wheater. Author, maintainer.

    @@ -88,15 +100,61 @@

    Citation

    -

    Burton P, Wilson R, Butters O, Ryser-Welch P, Westerberg A, Abarrategui L, Villegas-Diaz R, Avraam D, Marcon Y, Wheater S (2025). -dsBase: 'DataSHIELD' Server Site Base Functions. -R package version 6.4.0-9000. +

    Burton P, Wilson R, Butters O, Ryser-Welch P, Westerberg A, Abarrategui L, Villegas-Diaz R, Avraam D, Marcon Y, Bishop T, Gaye A, Escribà-Montagut X, Wheater S (????). +dsBase: 'DataSHIELD' Server Side Base Functions. +R package version 6.4.0.9000.

    @Manual{,
    -  title = {dsBase: 'DataSHIELD' Server Site Base Functions},
    -  author = {Paul Burton and Rebecca Wilson and Olly Butters and Patricia Ryser-Welch and Alex Westerberg and Leire Abarrategui and Roberto Villegas-Diaz and Demetris Avraam and Yannick Marcon and Stuart Wheater},
    -  year = {2025},
    -  note = {R package version 6.4.0-9000},
    +  title = {dsBase: 'DataSHIELD' Server Side Base Functions},
    +  author = {Paul Burton and Rebecca Wilson and Olly Butters and Patricia Ryser-Welch and Alex Westerberg and Leire Abarrategui and Roberto Villegas-Diaz and Demetris Avraam and Yannick Marcon and Tom Bishop and Amadou Gaye and Xavier Escribà-Montagut and Stuart Wheater},
    +  note = {R package version 6.4.0.9000},
    +}
    +

    Gaye A, Marcon Y, Isaeva J, LaFlamme P, Turner A, Jones E, Minion J, Boyd A, Newby C, Nuotio M, Wilson R, Butters O, Murtagh B, Demir I, Doiron D, Giepmans L, Wallace S, Budin-Ljøsne I, Schmidt C, Boffetta P, Boniol M, Bota M, Carter K, deKlerk N, Dibben C, Francis R, Hiekkalinna T, Hveem K, Kvaløy K, Millar S, Perry I, Peters A, Phillips C, Popham F, Raab G, Reischl E, Sheehan N, Waldenberger M, Perola M, van den Heuvel E, Macleod J, Knoppers B, Stolk R, Fortier I, Harris J, Woffenbuttel B, Murtagh M, Ferretti V, Burton P (2014). +“DataSHIELD: taking the analysis to the data, not the data to the analysis.” +International Journal of Epidemiology, 43(6), 1929–1944. +doi:10.1093/ije/dyu188. +

    +
    @Article{,
    +  title = {{DataSHIELD: taking the analysis to the data, not the data to the analysis}},
    +  author = {Amadou Gaye and Yannick Marcon and Julia Isaeva and Philippe {LaFlamme} and Andrew Turner and Elinor M Jones and Joel Minion and Andrew W Boyd and Christopher J Newby and Marja-Liisa Nuotio and Rebecca Wilson and Oliver Butters and Barnaby Murtagh and Ipek Demir and Dany Doiron and Lisette Giepmans and Susan E Wallace and Isabelle Budin-Lj{\o}sne and Carsten O. Schmidt and Paolo Boffetta and Mathieu Boniol and Maria Bota and Kim W Carter and Nick {deKlerk} and Chris Dibben and Richard W Francis and Tero Hiekkalinna and Kristian Hveem and Kirsti Kval{\o}y and Sean Millar and Ivan J Perry and Annette Peters and Catherine M Phillips and Frank Popham and Gillian Raab and Eva Reischl and Nuala Sheehan and Melanie Waldenberger and Markus Perola and Edwin {{van den Heuvel}} and John Macleod and Bartha M Knoppers and Ronald P Stolk and Isabel Fortier and Jennifer R Harris and Bruce H R Woffenbuttel and Madeleine J Murtagh and Vincent Ferretti and Paul R Burton},
    +  journal = {International Journal of Epidemiology},
    +  year = {2014},
    +  volume = {43},
    +  number = {6},
    +  pages = {1929--1944},
    +  doi = {10.1093/ije/dyu188},
    +}
    +

    Wilson R, Butters O, Avraam D, Baker J, Tedds J, Turner A, Murtagh M, Burton P (2017). +“DataSHIELD – New Directions and Dimensions.” +Data Science Journal, 16(21), 1–21. +doi:10.5334/dsj-2017-021. +

    +
    @Article{,
    +  title = {{DataSHIELD – New Directions and Dimensions}},
    +  author = {Rebecca C. Wilson and Oliver W. Butters and Demetris Avraam and James Baker and Jonathan A. Tedds and Andrew Turner and Madeleine Murtagh and Paul R. Burton},
    +  journal = {Data Science Journal},
    +  year = {2017},
    +  volume = {16},
    +  number = {21},
    +  pages = {1--21},
    +  doi = {10.5334/dsj-2017-021},
    +}
    +

    Avraam D, Wilson R, Aguirre Chan N, Banerjee S, Bishop T, Butters O, Cadman T, Cederkvist L, Duijts L, Escribà Montagut X, Garner H, Gonçalves G, González J, Haakma S, Hartlev M, Hasenauer J, Huth M, Hyde E, Jaddoe V, Marcon Y, Mayrhofer M, Molnar-Gabor F, Morgan A, Murtagh M, Nestor M, Nybo Andersen A, Parker S, Pinot de Moira A, Schwarz F, Strandberg-Larsen K, Swertz M, Welten M, Wheater S, Burton P (2024). +“DataSHIELD: mitigating disclosure risk in a multi-site federated analysis platform.” +Bioinformatics Advances, 5(1), 1–21. +doi:10.1093/bioadv/vbaf046. +

    +
    @Article{,
    +  title = {{DataSHIELD: mitigating disclosure risk in a multi-site federated analysis platform}},
    +  author = {Demetris Avraam and Rebecca C Wilson and Noemi {{Aguirre Chan}} and Soumya Banerjee and Tom R P Bishop and Olly Butters and Tim Cadman and Luise Cederkvist and Liesbeth Duijts and Xavier {{Escrib{\a`a} Montagut}} and Hugh Garner and Gon{\c c}alo {Gon{\c c}alves} and Juan R Gonz{\a'a}lez and Sido Haakma and Mette Hartlev and Jan Hasenauer and Manuel Huth and Eleanor Hyde and Vincent W V Jaddoe and Yannick Marcon and Michaela Th Mayrhofer and Fruzsina Molnar-Gabor and Andrei Scott Morgan and Madeleine Murtagh and Marc Nestor and Anne-Marie {{Nybo Andersen}} and Simon Parker and Angela {{Pinot de Moira}} and Florian Schwarz and Katrine Strandberg-Larsen and Morris A Swertz and Marieke Welten and Stuart Wheater and Paul R Burton},
    +  journal = {Bioinformatics Advances},
    +  year = {2024},
    +  volume = {5},
    +  number = {1},
    +  pages = {1--21},
    +  doi = {10.1093/bioadv/vbaf046},
    +  editor = {Thomas Lengauer},
    +  publisher = {Oxford University Press (OUP)},
     }
    @@ -106,11 +164,11 @@

    Citation

    -

    Site built with pkgdown 2.1.3.

    +

    Site built with pkgdown 2.2.0.

    diff --git a/docs/index.html b/docs/index.html index 81e9827b..1d86586f 100644 --- a/docs/index.html +++ b/docs/index.html @@ -1,18 +1,18 @@ - + -DataSHIELD Server Site Base Functions • dsBase +DataSHIELD Server Side Base Functions • dsBase - - + +

    +
    +install.packages("remotes")
    +remotes::install_github("datashield/dsBase", "<BRANCH>")
    +
    +# Install v6.3.4 with the following
    +remotes::install_github("datashield/dsBase", "6.3.4")
    +

    For a full list of development branches, checkout https://github.com/datashield/dsBase/branches

    + +
    +

    About +

    +

    DataSHIELD is a software package which allows you to do non-disclosive federated analysis on sensitive data. Our website (https://www.datashield.org) has in depth descriptions of what it is, how it works and how to install it. A key point to highlight is that DataSHIELD has a client-server infrastructure, so the dsBase package (https://github.com/datashield/dsBase) needs to be used in conjunction with the dsBaseClient package (https://github.com/datashield/dsBaseClient) - trying to use one without the other makes no sense.

    Detailed instructions on how to install DataSHIELD are at https://wiki.datashield.org/. The code here is organised as:

    @@ -91,12 +105,34 @@

    About

    +
    +
    +

    References +

    +

    [1] Burton P, Wilson R, Butters O, Ryser-Welch P, Westerberg A, Abarrategui L, Villegas-Diaz R, Avraam D, Marcon Y, Bishop T, Gaye A, Escribà Montagut X, Wheater S (2025). dsBase: ‘DataSHIELD’ Server Side Base Functions. R package version 6.3.4. https://doi.org/10.32614/CRAN.package.dsBase.

    +

    [2] Gaye A, Marcon Y, Isaeva J, LaFlamme P, Turner A, Jones E, Minion J, Boyd A, Newby C, Nuotio M, Wilson R, Butters O, Murtagh B, Demir I, Doiron D, Giepmans L, Wallace S, Budin-Ljøsne I, Oliver Schmidt C, Boffetta P, Boniol M, Bota M, Carter K, deKlerk N, Dibben C, Francis R, Hiekkalinna T, Hveem K, Kvaløy K, Millar S, Perry I, Peters A, Phillips C, Popham F, Raab G, Reischl E, Sheehan N, Waldenberger M, Perola M, van den Heuvel E, Macleod J, Knoppers B, Stolk R, Fortier I, Harris J, Woffenbuttel B, Murtagh M, Ferretti V, Burton P (2014). “DataSHIELD: taking the analysis to the data, not the data to the analysis.” International Journal of Epidemiology, 43(6), 1929-1944. https://doi.org/10.1093/ije/dyu188.

    +

    [3] Wilson R, W. Butters O, Avraam D, Baker J, Tedds J, Turner A, Murtagh M, R. Burton P (2017). “DataSHIELD – New Directions and Dimensions.” Data Science Journal, 16(21), 1-21. https://doi.org/10.5334/dsj-2017-021.

    +

    [4] Avraam D, Wilson R, Aguirre Chan N, Banerjee S, Bishop T, Butters O, Cadman T, Cederkvist L, Duijts L, Escribà Montagut X, Garner H, Gonçalves G, González J, Haakma S, Hartlev M, Hasenauer J, Huth M, Hyde E, Jaddoe V, Marcon Y, Mayrhofer M, Molnar-Gabor F, Morgan A, Murtagh M, Nestor M, Nybo Andersen A, Parker S, Pinot de Moira A, Schwarz F, Strandberg-Larsen K, Swertz M, Welten M, Wheater S, Burton P (2024). “DataSHIELD: mitigating disclosure risk in a multi-site federated analysis platform.” Bioinformatics Advances, 5(1), 1-21. https://doi.org/10.1093/bioadv/vbaf046.

    +
    +

    Note: Apple Mx architecture users, please be aware that there are some numerical limitations on this platform, which leads to unexpected results when using base R packages, like stats​.

    +

    x <- c(0, 3, 7)

    +

    1 - cor(x, x)​

    +

    The above should result in a value of zero.

    +

    Also See: For more details see https://cran.r-project.org/doc/FAQ/R-FAQ.html#Why-doesn_0027t-R-think-these-numbers-are-equal_003f and the bug report: https://bugs.r-project.org/show_bug.cgi?id=18941

    +
    @@ -81,11 +81,11 @@

    Author

    -

    Site built with pkgdown 2.1.3.

    +

    Site built with pkgdown 2.2.0.

    diff --git a/docs/reference/asDataFrameDS.html b/docs/reference/asDataFrameDS.html index d35f158e..21529f3b 100644 --- a/docs/reference/asDataFrameDS.html +++ b/docs/reference/asDataFrameDS.html @@ -1,5 +1,5 @@ -asDataFrameDS a serverside assign function called by ds.asDataFrame — asDataFrameDS • dsBaseasDataFrameDS a serverside assign function called by ds.asDataFrame — asDataFrameDS • dsBaseasDataFrameDS a serverside assign function called by ds.asDataFrame — asDataMatrixDS • dsBaseasDataMatrixDS a serverside assign function called by ds.asDataMatrix — asDataMatrixDS • dsBaseDetermines the levels of the input variable in each single study — asFactorDS1 • dsBaseDetermines the levels of the input variable in each single study — asFactorDS1 • dsBaseConverts a numeric vector into a factor — asFactorDS2 • dsBaseConverts a numeric vector into a factor — asFactorDS2 • dsBaseConverts a numeric vector into a factor — asFactorSimpleDS • dsBaseConverts a numeric vector into a factor — asFactorSimpleDS • dsBaseCoerces an R object into class integer — asIntegerDS • dsBaseCoerces an R object into class integer — asIntegerDS • dsBase @@ -17,7 +17,7 @@ dsBase - 6.4.0-9000 + 6.4.0.9000 @@ -82,11 +82,11 @@

    Author

    -

    Site built with pkgdown 2.1.3.

    +

    Site built with pkgdown 2.2.0.

    diff --git a/docs/reference/asListDS.html b/docs/reference/asListDS.html index 554bbcf5..5fd009ba 100644 --- a/docs/reference/asListDS.html +++ b/docs/reference/asListDS.html @@ -1,5 +1,5 @@ -asListDS a serverside aggregate function called by ds.asList — asListDS • dsBaseasListDS a serverside aggregate function called by ds.asList — asListDS • dsBase @@ -17,7 +17,7 @@ dsBase - 6.4.0-9000 + 6.4.0.9000 @@ -95,11 +95,11 @@

    Author

    -

    Site built with pkgdown 2.1.3.

    +

    Site built with pkgdown 2.2.0.

    diff --git a/docs/reference/asLogicalDS.html b/docs/reference/asLogicalDS.html index c715a303..d420e7e7 100644 --- a/docs/reference/asLogicalDS.html +++ b/docs/reference/asLogicalDS.html @@ -1,5 +1,5 @@ -Coerces an R object into class numeric — asLogicalDS • dsBaseCoerces an R object into class numeric — asLogicalDS • dsBase @@ -17,7 +17,7 @@ dsBase - 6.4.0-9000 + 6.4.0.9000 @@ -81,11 +81,11 @@

    Author

    -

    Site built with pkgdown 2.1.3.

    +

    Site built with pkgdown 2.2.0.

    diff --git a/docs/reference/asMatrixDS.html b/docs/reference/asMatrixDS.html index 02493cb4..79fa77b2 100644 --- a/docs/reference/asMatrixDS.html +++ b/docs/reference/asMatrixDS.html @@ -1,5 +1,5 @@ -Coerces an R object into a matrix — asMatrixDS • dsBaseCoerces an R object into a matrix — asMatrixDS • dsBase @@ -17,7 +17,7 @@ dsBase - 6.4.0-9000 + 6.4.0.9000 @@ -81,11 +81,11 @@

    Author

    -

    Site built with pkgdown 2.1.3.

    +

    Site built with pkgdown 2.2.0.

    diff --git a/docs/reference/asNumericDS.html b/docs/reference/asNumericDS.html index a3ea947b..99fd40de 100644 --- a/docs/reference/asNumericDS.html +++ b/docs/reference/asNumericDS.html @@ -1,5 +1,5 @@ -Coerces an R object into class numeric — asNumericDS • dsBaseCoerces an R object into class numeric — asNumericDS • dsBase @@ -17,7 +17,7 @@ dsBase - 6.4.0-9000 + 6.4.0.9000 @@ -82,11 +82,11 @@

    Author

    -

    Site built with pkgdown 2.1.3.

    +

    Site built with pkgdown 2.2.0.

    diff --git a/docs/reference/aucDS.html b/docs/reference/aucDS.html index 99cebbfe..bdc3b9e0 100644 --- a/docs/reference/aucDS.html +++ b/docs/reference/aucDS.html @@ -1,5 +1,5 @@ -aucDS an aggregate function called by ds.auc — aucDS • dsBaseaucDS an aggregate function called by ds.auc — aucDS • dsBaseSecure ranking of "V2BR" (vector to be ranked) across all sources — blackBoxDS • dsBaseSecure ranking of "V2BR" (vector to be ranked) across all sources — blackBoxDS • dsBaseSecure ranking of "V2BR" (vector to be ranked) across all sources — blackBoxRanksDS • dsBaseSecure ranking of "V2BR" (vector to be ranked) across all sources — blackBoxRanksDS • dsBaseCreate the identity stats and necessary data to draw a plot on the client — boxPlotGGDS • dsBaseCreate the identity stats and necessary data to draw a plot on the client — boxPlotGGDS • dsBaseArrange data frame to pass it to the boxplot function — boxPlotGG_data_TreatmentDS • dsBaseArrange data frame to pass it to the boxplot function — boxPlotGG_data_TreatmentDS • dsBase @@ -17,7 +17,7 @@ dsBase - 6.4.0-9000 + 6.4.0.9000 @@ -84,11 +84,11 @@

    Value

    -

    Site built with pkgdown 2.1.3.

    +

    Site built with pkgdown 2.2.0.

    diff --git a/docs/reference/boxPlotGG_data_Treatment_numericDS.html b/docs/reference/boxPlotGG_data_Treatment_numericDS.html index 2c27d489..b9397d80 100644 --- a/docs/reference/boxPlotGG_data_Treatment_numericDS.html +++ b/docs/reference/boxPlotGG_data_Treatment_numericDS.html @@ -1,5 +1,5 @@ -Arrange vector to pass it to the boxplot function — boxPlotGG_data_Treatment_numericDS • dsBaseArrange vector to pass it to the boxplot function — boxPlotGG_data_Treatment_numericDS • dsBase @@ -17,7 +17,7 @@ dsBase - 6.4.0-9000 + 6.4.0.9000 @@ -70,11 +70,11 @@

    Value

    -

    Site built with pkgdown 2.1.3.

    +

    Site built with pkgdown 2.2.0.

    diff --git a/docs/reference/bp_standardsDS.html b/docs/reference/bp_standardsDS.html index 63383bfb..89765849 100644 --- a/docs/reference/bp_standardsDS.html +++ b/docs/reference/bp_standardsDS.html @@ -1,5 +1,5 @@ -Calculates Blood pressure z-scores — bp_standardsDS • dsBaseCalculates Blood pressure z-scores — bp_standardsDS • dsBaseConcatenates objects into a vector or list — cDS • dsBaseConcatenates objects into a vector or list — cDS • dsBase @@ -17,7 +17,7 @@ dsBase - 6.4.0-9000 + 6.4.0.9000 @@ -77,11 +77,11 @@

    Author

    -

    Site built with pkgdown 2.1.3.

    +

    Site built with pkgdown 2.2.0.

    diff --git a/docs/reference/cbindDS.html b/docs/reference/cbindDS.html index 50ac432e..4df08184 100644 --- a/docs/reference/cbindDS.html +++ b/docs/reference/cbindDS.html @@ -1,5 +1,5 @@ -cbindDS called by ds.cbind — cbindDS • dsBasecbindDS called by ds.cbind — cbindDS • dsBaseChanges a reference level of a factor — changeRefGroupDS • dsBaseChanges a reference level of a factor — changeRefGroupDS • dsBase @@ -17,7 +17,7 @@ dsBase - 6.4.0-9000 + 6.4.0.9000 @@ -90,11 +90,11 @@

    Author

    -

    Site built with pkgdown 2.1.3.

    +

    Site built with pkgdown 2.2.0.

    diff --git a/docs/reference/checkNegValueDS.html b/docs/reference/checkNegValueDS.html index 7a954087..e356a27f 100644 --- a/docs/reference/checkNegValueDS.html +++ b/docs/reference/checkNegValueDS.html @@ -1,5 +1,5 @@ -Checks if a numeric variable has negative values — checkNegValueDS • dsBaseChecks if a numeric variable has negative values — checkNegValueDS • dsBase @@ -17,7 +17,7 @@ dsBase - 6.4.0-9000 + 6.4.0.9000 @@ -61,7 +61,7 @@

    Value

    Details

    -

    if a user sets the parameter 'weights' on the client site function ds.glm this +

    if a user sets the parameter 'weights' on the client side function ds.glm this server side function is called to verify that the 'weights' vector does not have negative values because no negative are allowed in weights.

    @@ -78,11 +78,11 @@

    Author

    -

    Site built with pkgdown 2.1.3.

    +

    Site built with pkgdown 2.2.0.

    diff --git a/docs/reference/checkPermissivePrivacyControlLevel.html b/docs/reference/checkPermissivePrivacyControlLevel.html index 4e7ca4d3..3e1038f9 100644 --- a/docs/reference/checkPermissivePrivacyControlLevel.html +++ b/docs/reference/checkPermissivePrivacyControlLevel.html @@ -1,5 +1,9 @@ +<<<<<<< HEAD checkPermissivePrivacyControlLevel — checkPermissivePrivacyControlLevel • dsBasecheckPermissivePrivacyControlLevel — checkPermissivePrivacyControlLevel • dsBase @@ -17,7 +21,11 @@ dsBase +<<<<<<< HEAD 6.4.0-9000 +======= + 6.3.4 +>>>>>>> origin/v6.3.5-dev @@ -78,7 +86,7 @@

    Author