diff --git a/.Rbuildignore b/.Rbuildignore
index b66ab898..afbc8f5c 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -17,8 +17,8 @@
^R/secure.global.ranking.md$
^_pkgdown\.yml$
^docs$
-^dsBase_6.3.5.tar.gz$
-^dsBase_6.3.5-permissive.tar.gz$
+^dsBase_7.0-dev-feat_performance\.tar\.gz$
+^dsBase_7.0-dev-feat_performance-permissive\.tar\.gz$
^dsDanger_6.3.4.tar.gz$
^\.circleci$
^\.circleci/config\.yml$
diff --git a/NAMESPACE b/NAMESPACE
index a41b8f0a..8bdab82e 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -89,7 +89,6 @@ export(ds.rBinom)
export(ds.rNorm)
export(ds.rPois)
export(ds.rUnif)
-export(ds.ranksSecure)
export(ds.rbind)
export(ds.reShape)
export(ds.recodeLevels)
diff --git a/R/ds.colnames.R b/R/ds.colnames.R
index a9e80252..a4b98b1a 100644
--- a/R/ds.colnames.R
+++ b/R/ds.colnames.R
@@ -1,51 +1,51 @@
#'
#' @title Produces column names of the R object in the server-side
-#' @description Retrieves column names of an R object on the server-side.
+#' @description Retrieves column names of an R object on the server-side.
#' This function is similar to R function \code{colnames}.
-#' @details The input is restricted to the object of type \code{data.frame} or \code{matrix}.
-#'
+#' @details The input is restricted to the object of type \code{data.frame} or \code{matrix}.
+#'
#' Server function called: \code{colnamesDS}
#' @param x a character string providing the name of the input data frame or matrix.
-#' @param datasources a list of \code{\link[DSI]{DSConnection-class}} objects obtained after login.
+#' @param datasources a list of \code{\link[DSI]{DSConnection-class}} objects obtained after login.
#' If the \code{datasources} argument is not specified
#' the default set of connections will be used: see \code{\link[DSI]{datashield.connections_default}}.
-#' @return \code{ds.colnames} returns the column names of
-#' the specified server-side data frame or matrix.
+#' @return \code{ds.colnames} returns the column names of
+#' the specified server-side data frame or matrix.
#' @author DataSHIELD Development Team
#' @seealso \code{\link{ds.dim}} to obtain the dimensions of a matrix or a data frame.
-#' @examples
+#' @examples
#' \dontrun{
-#'
+#'
#' ## Version 6, for version 5 see the Wiki
#' # Connecting to the Opal servers
-#'
+#'
#' require('DSI')
#' require('DSOpal')
#' require('dsBaseClient')
-#'
+#'
#' builder <- DSI::newDSLoginBuilder()
-#' builder$append(server = "study1",
-#' url = "http://192.168.56.100:8080/",
-#' user = "administrator", password = "datashield_test&",
+#' builder$append(server = "study1",
+#' url = "http://192.168.56.100:8080/",
+#' user = "administrator", password = "datashield_test&",
#' table = "CNSIM.CNSIM1", driver = "OpalDriver")
-#' builder$append(server = "study2",
-#' url = "http://192.168.56.100:8080/",
-#' user = "administrator", password = "datashield_test&",
+#' builder$append(server = "study2",
+#' url = "http://192.168.56.100:8080/",
+#' user = "administrator", password = "datashield_test&",
#' table = "CNSIM.CNSIM2", driver = "OpalDriver")
#' builder$append(server = "study3",
-#' url = "http://192.168.56.100:8080/",
-#' user = "administrator", password = "datashield_test&",
+#' url = "http://192.168.56.100:8080/",
+#' user = "administrator", password = "datashield_test&",
#' table = "CNSIM.CNSIM3", driver = "OpalDriver")
#' logindata <- builder$build()
-#'
+#'
#' # Log onto the remote Opal training servers
-#' connections <- DSI::datashield.login(logins = logindata, assign = TRUE, symbol = "D")
-#'
+#' connections <- DSI::datashield.login(logins = logindata, assign = TRUE, symbol = "D")
+#'
#' # Getting column names of the R objects stored in the server-side
#' ds.colnames(x = "D",
#' datasources = connections[1]) #only the first server ("study1") is used
#' # Clear the Datashield R sessions and logout
-#' datashield.logout(connections)
+#' datashield.logout(connections)
#' }
#' @export
#'
@@ -65,6 +65,17 @@ ds.colnames <- function(x=NULL, datasources=NULL) {
stop("Please provide the name of a data.frame or matrix!", call.=FALSE)
}
+ # check if the input object(s) is(are) defined in all the studies
+ defined <- isDefined(datasources, x)
+
+ # call the internal function that checks the input object is of the same class in all studies.
+ typ <- checkClass(datasources, x)
+
+ # if the input object is not a matrix or a dataframe stop
+ if(!('data.frame' %in% typ) & !('matrix' %in% typ)){
+ stop("The input vector must be of type 'data.frame' or a 'matrix'!", call.=FALSE)
+ }
+
cally <- call("colnamesDS", x)
column_names <- DSI::datashield.aggregate(datasources, cally)
diff --git a/R/ds.ranksSecure.R b/R/ds.ranksSecure.R
deleted file mode 100644
index 8ffa6a97..00000000
--- a/R/ds.ranksSecure.R
+++ /dev/null
@@ -1,585 +0,0 @@
-# ds.ranksSecure
-#' @title Secure ranking of a vector across all sources
-#' @description Securely generate the ranks of a numeric vector and estimate
-#' true global quantiles across all data sources simultaneously
-#' @details ds.ranksSecure is a clientside function which calls a series of
-#' other clientside and serverside functions to securely generate the global
-#' ranks of a numeric vector "V2BR" (vector to be ranked)
-#' in order to set up analyses on V2BR based on
-#' non-parametric methods, some types of survival analysis and to derive true
-#' global quantiles (such as the median, lower (25%) and upper (75%) quartiles,
-#' and the 95% and 97.5% quantiles) across all sources simultaneously. These
-#' global quantiles are, in general, different to the mean or median of the
-#' equivalent quantiles calculated independently in each data source separately.
-#' For more details about the cluster of functions that collectively
-#' enable secure global ranking and estimation of global quantiles see the
-#' associated document entitled "secure.global.ranking.docx".
-#' @param input.var.name a character string in a format that can pass through
-#' the DataSHIELD R parser which specifies the name of the vector to be ranked.
-#' Needs to have same name in each data source.
-#' @param quantiles.for.estimation one of a restricted set of character strings.
-#' To mitigate disclosure risk only the following set of quantiles can be
-#' generated: c(0.025,0.05,0.10,0.20,0.25,0.30,0.3333,0.40,0.50,0.60,0.6667,
-#' 0.70,0.75,0.80,0.90,0.95,0.975). The allowable formats for the argument
-#' are of the general form: "0.025-0.975" where the first number is the lowest
-#' quantile to be estimated and the second number is the equivalent highest
-#' quantile to estimate. These two quantiles are then estimated along with
-#' all allowable quantiles in between. The allowable argument values are then:
-#' "0.025-0.975", "0.05-0.95", "0.10-0.90", "0.20-0.80". Two alternative values
-#' are "quartiles" i.e. c(0.25,0.50,0.75), and "median" i.e. c(0.50). The
-#' default value is "0.05-0.95". If the sample size is so small that an extreme
-#' quartile could be disclosive the function will be terminated and an error
-#' message returned telling you that you might try using an argument with a
-#' narrower set of quantiles. This disclosure trap will be triggered if the
-#' total number of subjects across all studies divided by the total number
-#' of quantile values being estimated is less than or equal to nfilter.tab
-#' (the minimum cell size in a contingency table).
-#' @param generate.quantiles a logical value indicating whether the
-#' ds.ranksSecure function should carry on to estimate the key quantile
-#' values specified by argument or should stop
-#' once the global ranks have been created and written to the serverside.
-#' Default is TRUE and as the key quantiles are generally non-disclosive this
-#' is usually the setting to use. But, if there is some abnormal configuration
-#' of the clusters of values that are being ranked such that some values are
-#' treated as being missing and the processing stops, then setting
-#' generate.quantiles to FALSE allows the generation of ranks to complete so
-#' they can then be used for non-parametric analysis, even if the key values
-#' cannot be estimated. A real example of an unusual configuration was in a
-#' reasonably large dataset of survival times, where a substantial proportion
-#' of survival profiles were censored at precisely 10 years. This meant that
-#' the 97.5% percentile could not be separated from the 95% percentile and so
-#' the former was allocated the value NA. This stopped processing of the ranks
-#' which could then be enabled by setting generate.quantiles to FALSE. However,
-#' if this problem is detected an error message is returned which indicates that
-#' in some cases (as in this case in fact) the problem can be circumvented
-#' by selecting a narrow range of key quantiles to estimate. In this case, in
-#' fact, this simply required changing the argument
-#' from "0.025-0.975" to "0.05-0.95".
-#' @param output.ranks.df a character string in a format that can pass through
-#' the DataSHIELD R parser which specifies an optional name for the
-#' data.frame written to the serverside on each data source that contains
-#' 11 of the key output variables from the ranking procedure pertaining to that
-#' particular data source. This includes the global ranks and quantiles of each
-#' value of the V2BR (i.e. the values are ranked across all studies
-#' simultaneously). If no name is specified, the default name
-#' is allocated as "full.ranks.df". This data.frame contains disclosive
-#' information and cannot therefore be passed to the clientside.
-#' @param summary.output.ranks.df a character string in a format that can pass through
-#' the DataSHIELD R parser which specifies an optional name for the summary
-#' data.frame written to the serverside on each data source that contains
-#' 5 of the key output variables from the ranking procedure pertaining to that
-#' particular data source. This again includes the global ranks and quantiles of each
-#' value of the V2BR (i.e. the values are ranked across all studies
-#' simultaneously). If no name is specified, the default name
-#' is allocated as "summary.ranks.df" This data.frame contains disclosive
-#' information and cannot therefore be passed to the clientside.
-#' @param ranks.sort.by a character string taking two possible values. These
-#' are "ID.orig" and "vals.orig". These define the order in which the
-#' output.ranks.df and summary.output.ranks.df data frames are presented. If
-#' the argument is set as "ID.orig" the order of rows in the output data frames
-#' are precisely the same as the order of original input vector that is being
-#' ranked (i.e. V2BR). This means the ranks can simply be cbinded to the
-#' matrix, data frame or tibble that originally included V2BR so it also
-#' includes the corresponding ranks. If it is set as "vals.orig" the output
-#' data frames are in order of increasing magnitude of the original values of
-#' V2BR. Default value is "ID.orig".
-#' @param shared.seed.value an integer value which is used to set the
-#' random seed generator in each study. Initially, the seed is set to be the
-#' same in all studies, so the order and parameters of the repeated
-#' encryption procedures are precisely the same in each study. Then a
-#' study-specific modification of the seed in each study ensures that the
-#' procedures initially generating the masking pseudodata (which are then
-#' subject to the same encryption procedures as the real data) are different
-#' in each study. For further information about the shared seed and how we
-#' intend to transmit it in the future, please see the detailed associated
-#' header document.
-#' @param synth.real.ratio an integer value specifying the ratio between the
-#' number of masking pseudodata values generated in each study compared to
-#' the number of real data values in V2BR.
-#' @param NA.manage character string taking three possible values: "NA.delete",
-#' "NA.low","NA.hi". This argument determines how missing values are managed
-#' before ranking. "NA.delete" results in all missing values being removed
-#' prior to ranking. This means that the vector of ranks in each study is
-#' shorter than the original vector of V2BR values by an amount corresponding
-#' to the number of missing values in V2BR in that study. Any rows containing
-#' missing values in V2BR are simply removed before the ranking procedure is
-#' initiated so the order of rows without missing data is unaltered. "NA.low"
-#' indicates that all missing values should be converted to a new value that
-#' has a meaningful magnitude that is lower (more negative or less positive)
-#' than the lowest non-missing value of V2BR in any of the studies. This means,
-#' for example, that if there are a total of M values of V2BR that are missing
-#' across all studies, there will be a total of M observations that are ranked
-#' lowest each with a rank of (M+1)/2. So if 7 are missing the lowest 7 ranks
-#' will be 4,4,4,4,4,4,4 and if 4 are missing the first 4 ranks will be
-#' 2.5,2.5,2.5,2.5. "NA.hi" indicates that all missing values should be
-#' converted to a new value that has a meaningful magnitude that is higher(less
-#' negative or more positive)than the highest non-missing value of V2BR in any
-#' of the studies. This means, for example, that if there are a total of M
-#' values of V2BR that are missing across all studies and N non-missing
-#' values, there will be a total of M observations that are ranked
-#' highest each with a rank of (2N-M+1)/2. So if there are a total of 1000
-#' V2BR values and 9 are missing the highest 9 ranks will be 996, 996 ... 996.
-#' If NA.manage is either "NA.low" or "NA.hi" the final rank vector in each
-#' study will have the same length as the V2BR vector in that same study.
-#' 2.5,2.5,2.5,2.5. The default value of the "NA.manage" argument is "NA.delete"
-#' @param rm.residual.objects logical value. Default = TRUE: at the beginning
-#' and end of each run of ds.ranksSecure delete all extraneous objects that are
-#' otherwise left behind. These are not usually needed, but could be of value
-#' if one were investigating a problem with the ranking. FALSE: do not delete
-#' the residual objects
-#' @param monitor.progress logical value. Default = FALSE. If TRUE, function
-#' outputs information about its progress.
-#' @param datasources specifies the particular opal object(s) to use. If the
-#' argument is not specified (NULL) the default set of opals
-#' will be used. If is specified, it should be set without
-#' inverted commas: e.g. datasources=opals.em. If you wish to
-#' apply the function solely to e.g. the second opal server in a set of three,
-#' the argument can be specified as: e.g. datasources=opals.em[2].
-#' If you wish to specify the first and third opal servers in a set you specify:
-#' e.g. datasources=opals.em[c(1,3)].
-#' @return the data frame objects specified by the arguments output.ranks.df
-#' and summary.output.ranks.df. These are written to the serverside in each
-#' study. Provided the sort order is consistent these data frames can be cbinded
-#' to any other data frame, matrix or tibble object containing V2BR or to the
-#' V2BR vector itself, allowing the global ranks and quantiles to be
-#' analysed rather than the actual values of V2BR. The last call within
-#' the ds.ranksSecure function is to another clientside function
-#' ds.extractQuantile (for further details see header for that function).
-#' This returns an additional data frame "final.quantile.df" of which the first
-#' column is the vector of key quantiles to be estimated as specified by the
-#' argument and the second column is the list of
-#' precise values of V2BR which correspond to these key quantiles. Because
-#' the serverside functions associated with ds.ranksSecure and
-#' ds.extractQuantile block potentially disclosive output (see information
-#' for parameter quantiles.for.estimation) the "final.quantile.df" is returned
-#' to the client allowing the direct reporting of V2BR values corresponding to
-#' key quantiles such as the quartiles, the median and 95th percentile etc. In
-#' addition a copy of the same data frame is also written to the serverside in
-#' each study allowing the value of key quantiles such as the median to be
-#' incorporated directly in calculations or transformations on the serverside
-#' regardless in which study (or studies) those key quantile values have
-#' occurred.
-#' @author Paul Burton 4th November, 2021
-#' @export
-ds.ranksSecure <- function(input.var.name=NULL, quantiles.for.estimation="0.05-0.95",
- generate.quantiles=TRUE,
- output.ranks.df=NULL, summary.output.ranks.df = NULL,
- ranks.sort.by="ID.orig", shared.seed.value=10,
- synth.real.ratio=2,NA.manage="NA.delete",
- rm.residual.objects=TRUE, monitor.progress=FALSE,
- datasources=NULL){
-
- # look for DS connections
- if(is.null(datasources)){
- datasources <- datashield.connections_find()
- }
-
- datasources.in.current.function<-datasources
-
- # ensure datasources is a list of DSConnection-class
- if(!(is.list(datasources) && all(unlist(lapply(datasources, function(d) {methods::is(d,"DSConnection")}))))){
- stop("The 'datasources' were expected to be a list of DSConnection-class objects", call.=FALSE)
- }
-
- # check if user has provided the name of the column that holds the input variable
- if(is.null(input.var.name)){
- stop("Please provide the name of the variable to be ranked across all sources collectively e.g. 'varname'", call.=FALSE)
- }
-
- # check if user has provided the name of the input variable in a correct character format
- if(!is.character(input.var.name)){
- stop("Please provide the name of the variable that is to be converted to a factor in character format e.g. 'varname'", call.=FALSE)
- }
-
- # look for output df names and provide defaults if required
- if(is.null(output.ranks.df)){
- output.ranks.df<-"full.ranks.df"
- }
-
- if(is.null(summary.output.ranks.df)){
- summary.output.ranks.df<-"summary.ranks.df"
- }
-
- if(is.null(synth.real.ratio)){
- synth.real.ratio<-10
- }
-
-#CLEAN UP RESIDUAL OBJECTS FROM PREVIOUS RUNS OF THE FUNCTION
- if(rm.residual.objects)
- {
- #UNLESS THE IS FALSE,
- #CLEAR UP ANY UNWANTED RESIDUAL OBJECTS FROM THE
- #PREVIOUS RUNNING OF THE ds.ranksSecure FUNCTION IN THE
- #CASE THAT PREVIOUS CALL STOPPED PREMATURELY AND SO THE
- #FINAL CLEARING UP STEP WAS NOT INITIATED.
-
- rm.names<-c("blackbox.output.df", "blackbox.ranks.df",
- "global.bounds.df", "global.ranks.quantiles.df",
- "input.mean.sd.df", "input.ranks.sd.df",
- output.ranks.df, "min.max.df", "numstudies.df",
- "sR4.df", "sR5.df")
-
- #make transmittable via parser
- rm.names.transmit <- paste(rm.names,collapse=",")
-
- calltext.rm <- call("rmDS", rm.names.transmit)
-
- rm.output <- DSI::datashield.aggregate(datasources, calltext.rm)
- }
-
- if(monitor.progress){
-message("\n\nStep 1 of 8 complete:
- Cleaned up residual output from
- previous runs of ds.ranksSecure
-
-
- ")
-
-
- }
-
- #CALL AN INITIALISING SERVER SIDE FUNCTION (ASSIGN)
- #TO IDENTIFY QUANTILES OF ORIGINAL VARIABLES IN EACH STUDY
- #TO CREATE A STARTING CONFIGURATION THAT IS ALMOST CERTAINLY >1 WITH A SPAN OF 10
-
- cally0 <- paste0('quantileMeanDS(', input.var.name, ')')
- initialise.input.var <- DSI::datashield.aggregate(datasources, as.symbol(cally0))
-
-
- numstudies<-length(initialise.input.var)
- numvals<-length(initialise.input.var[[1]])
-
- q5.val<-NULL
- q95.val<-NULL
- mean.val<-NULL
-
- for(rr in 1:numstudies){
- q5.val<-c(q5.val,initialise.input.var[[rr]][1])
- q95.val<-c(q95.val,initialise.input.var[[rr]][numvals-1])
- mean.val<-c(mean.val,initialise.input.var[[rr]][numvals])
- }
-
- min.q5<-min(q5.val)
- max.q95<-max(q95.val)
-
- max.sd.input.var<-(max.q95-min.q5)/(2*1.65)
- mean.input.var<-mean(mean.val)
-
- input.mean.sd.df<-data.frame(cbind(mean.input.var,max.sd.input.var))
-
-
- #CALL CLIENTSIDE FUNCTION ds.dmtC2S TO RETURN VALUES TO SERVERSIDE
- dsBaseClient::ds.dmtC2S(dfdata=input.mean.sd.df,newobj="input.mean.sd.df")
-
-if(monitor.progress){
-message("\n\nStep 2 of 8 complete:
- Estimated mean and sd of
- v2br to standardise initial values
-
-
- ")
- }
-
-#CALL minMaxRandDS FUNCTION (AGGREGATE) TO CREATE MIN AND MAX VALUES
-#FOR INPUT VARIABLE WITH RANDOM NOISE ON TOP. ACTUAL VALUE DOESN'T
-#MATTER AS IT IS ONLY TO ALLOCATE LOW AND HIGH VALUES TO NA WHEN
-#THEY ARE TO BE INCLUDED IN THE RANKING
-
- calltext0 <- call("minMaxRandDS",input.var.name)
- rand.min.max<-DSI::datashield.aggregate(datasources, calltext0)
-
-
- numstudies<-length(rand.min.max)
-
- rand.min.min<-NULL
- rand.max.max<-NULL
-
- for(ss in 1:numstudies){
- rand.min.min<-c(rand.min.min,rand.min.max[[ss]][1])
- rand.max.max<-c(rand.max.max,rand.min.max[[ss]][2])
- }
-
- min.min.final<-min(rand.min.min)
- max.max.final<-min(rand.max.max)
-
- min.max.df<-data.frame(cbind(min.min.final,max.max.final))
-
-#CALL CLIENTSIDE FUNCTION ds.dmtC2S TO RETURN VALUES TO SERVERSIDE
-dsBaseClient::ds.dmtC2S(dfdata=min.max.df,newobj="min.max.df")
-
-if(monitor.progress){
-message("\n\nStep 3 of 8 complete:
- Generated ultra max and ultra min values to allocate to
- missing values if is NA.hi or NA.low
-
-
- ")
-}
-
- #CALL THE FIRST SERVER SIDE FUNCTION (ASSIGN)
- #WRITES ENCRYPTED DATA TO SERVERSIDE OBJECT "blackbox.output.df"
- calltext1 <- call("blackBoxDS", input.var.name=input.var.name,
- #max.sd.input.var=input.mean.sd.df$max.sd.input.var,
- #mean.input.var=input.mean.sd.df$mean.input.var,
- shared.seedval=shared.seed.value,synth.real.ratio,NA.manage)
- DSI::datashield.assign(datasources, "blackbox.output.df", calltext1)
-
-if(monitor.progress){
-message("\n\nStep 4 of 8 complete:
- Pseudo data synthesised,first set of rank-consistent
- transformations complete and blackbox.output.df created
-
-
- ")
- }
-
- #CALL THE SECOND SERVER SIDE FUNCTION (AGGREGATE)
- #RETURN ENCRYPTED DATA IN "blackbox.output.df" TO CLIENTSIDE
- calltext2 <- call("ranksSecureDS1")
- blackbox.output<-DSI::datashield.aggregate(datasources, calltext2)
-
- numstudies<-length(blackbox.output)
-
- studyid<-rep(1,nrow(blackbox.output[[1]]))
-
- sR3.df<-data.frame(cbind(blackbox.output[[1]],studyid))
-
-
- if(numstudies>=1)
- {
- for(ss in 2:numstudies)
- {
- studyid<-rep(ss,nrow(blackbox.output[[ss]]))
-
- temp.df<-data.frame(cbind(blackbox.output[[ss]],studyid))
- sR3.df<-rbind(sR3.df,temp.df)
- }
- }
- colnames(sR3.df)<-c(colnames(blackbox.output[[1]]),"studyid")
-
- ord.global.val<-order(sR3.df$encrypted.var)
- sR3.df<-sR3.df[ord.global.val,]
- global.rank<-rank(sR3.df$encrypted.var)
- sR3.sort.global.val.df<-data.frame(cbind(sR3.df,global.rank))
-
-
- #CALL CLIENTSIDE FUNCTION ds.dmtC2S TO RETURN df TO SERVERSIDE
- for(ss in 1:3)
- {
- sR4.df<-sR3.sort.global.val.df[sR3.sort.global.val.df$studyid==ss,]
- dsBaseClient::ds.dmtC2S(dfdata=sR4.df,newobj="sR4.df",
- datasources = datasources.in.current.function[ss])
- }
-
- numstudies.df<-data.frame(numstudies)
-
- #CALL CLIENTSIDE FUNCTION ds.dmtC2S TO RETURN numstudies TO SERVERSIDE
- dsBaseClient::ds.dmtC2S(dfdata=numstudies.df,newobj="numstudies.df",
- datasources = datasources.in.current.function)
-
-
- #CALL THE THIRD SERVER SIDE FUNCTION (ASSIGN)
- #SELECTS ENCRYPTED DATA FOR REAL SUBJECTS IN EACH
- #STUDY SPECIFIC sR4.df AND WRITES AS sR5.df ON SERVERSIDE
- calltext3 <- call("ranksSecureDS2")
- DSI::datashield.assign(datasources,"sR5.df",calltext3)
-
- ds.make("sR5.df$global.rank","testvar.ranks")
-
-if(monitor.progress){
- message("\n\nStep 5 of 8 complete:
- Global ranks generated and pseudodata stripped out. Now ready
- to proceed to transformation of global ranks
-
-
- ")
- }
-
- input.ranks.name<-"testvar.ranks"
-
- cally2 <- paste0('quantileMeanDS(', input.ranks.name, ')')
- initialise.input.ranks <- DSI::datashield.aggregate(datasources, as.symbol(cally2))
-
-
- numstudies<-length(initialise.input.ranks)
- numvals<-length(initialise.input.ranks[[1]])
-
- q5.val<-NULL
- q95.val<-NULL
- mean.ranks<-NULL
-
- for(rr in 1:numstudies){
- q5.val<-c(q5.val,initialise.input.ranks[[rr]][1])
- q95.val<-c(q95.val,initialise.input.ranks[[rr]][numvals-1])
- mean.ranks<-c(mean.ranks,initialise.input.ranks[[rr]][numvals])
- }
-
- min.q5<-min(q5.val)
- max.q95<-max(q95.val)
-
- max.sd.input.ranks<-(max.q95-min.q5)/(2*1.65)
- mean.input.ranks<-mean(mean.ranks)
-
- input.ranks.sd.df<-data.frame(cbind(mean.input.ranks,max.sd.input.ranks))
-
-
- #CALL CLIENTSIDE FUNCTION ds.dmtC2S TO RETURN VALUES TO SERVERSIDE
- dsBaseClient::ds.dmtC2S(dfdata=input.ranks.sd.df,newobj="input.ranks.sd.df")
-
-
-
- #CALLS FOURTH SERVER SIDE FUNCTION (ASSIGN)
- #THAT IS A MODIFIED VERSION OF blackBoxDS THAT
- #ENCRYPTS JUST THE RANKS OF THE REAL DATA AND WRITES
- #TO blackbox.ranks.df ON THE SERVERSIDE
- #THIS VERSION (blackBoxDS2) CREATES NO SYNTHETIC DATA TO
- #CONCEAL VALUES
-
-
- calltext4 <- call("blackBoxRanksDS","testvar.ranks",
- shared.seedval=shared.seed.value)
-
- DSI::datashield.assign(datasources, "blackbox.ranks.df", calltext4)
-
-if(monitor.progress){
- message("\n\nStep 6 of 8 complete:
- Rank-consistent transformations of global ranks complete
- and blackbox.ranks.df created
-
-
- ")
- }
-
-
-
- #CALL THE FIFTH SERVER SIDE FUNCTION (AGGREGATE)
- #SEND NON-DISCLOSIVE ELEMENTS OF (ENCRYPTED) DATA IN "blackbox.ranks.df"
- #TO CLIENTSIDE
-
- calltext5 <- call("ranksSecureDS3")
- blackbox.ranks.output<-DSI::datashield.aggregate(datasources, calltext5)
-
- numstudies<-length(blackbox.ranks.output)
-
- sR6.df<-blackbox.ranks.output[[1]]
-
-
- if(numstudies>=1)
- {
- for(ss in 2:numstudies)
- {
- sR6.df<-rbind(sR6.df,blackbox.ranks.output[[ss]])
- }
- }
- sR6.df<-data.frame(sR6.df)
- colnames(sR6.df)<-c(colnames(blackbox.ranks.output[[1]]))
-
-
- #Rank encrypted ranks across all studies
- real.ranks.global<-rank(sR6.df$encrypted.ranks)
- real.quantiles.global<-real.ranks.global/length(real.ranks.global)
- sR7.df<-cbind(sR6.df,real.ranks.global,real.quantiles.global)
- ord.by.real.ranks.global<-order(sR7.df$real.ranks.global)
- sR7.df.by.real.ranks.global<-sR7.df[ord.by.real.ranks.global,]
-
-
-
- #CALL CLIENTSIDE FUNCTION ds.dmtC2S TO RETURN sR7.df TO SERVERSIDE
- for(ss in 1:3)
- {
- sR7.df.study.specific<-sR7.df.by.real.ranks.global[sR7.df.by.real.ranks.global$studyid==ss,]
- dsBaseClient::ds.dmtC2S(dfdata=sR7.df.study.specific,newobj="global.ranks.quantiles.df",
- datasources = datasources.in.current.function[ss])
- }
-
-
-
-
- #CALL THE SIXTH SERVER SIDE FUNCTION (ASSIGN)
- #TAKE ALLOCATED GLOBAL RANKS FROM sR7.df APPEND TO blackbox.ranks.df
- #TO CREATE sR9.df
-
- calltext6 <- call("ranksSecureDS4",ranks.sort.by)
- DSI::datashield.assign(datasources,output.ranks.df,calltext6)
-
-
- calltext7 <- call("ranksSecureDS5", output.ranks.df)
- DSI::datashield.assign(datasources,summary.output.ranks.df, calltext7)
-
- if(monitor.progress){
- message("\n\nStep 7 of 8 complete:
- Final global ranking of values in v2br complete and
- written to each serverside as appropriate
-
-
- ",summary.output.ranks.df)
- }
-
-
-
- #CLEAN UP UNWANTED RESIDUAL OBJECTS FROM THE RUNNING OF ds.ranksSecure
- #EXCEPT FOR OBJECTS CREATED BY ds.extractQuantiles
-
- if(rm.residual.objects)
- {
- #UNLESS THE IS FALSE,
- #CLEAR UP ANY UNWANTED RESIDUAL OBJECTS
-
- rm.names.rS<-c("blackbox.output.df", "blackbox.ranks.df",
- "global.ranks.quantiles.df","input.mean.sd.df", "input.ranks.sd.df",
- output.ranks.df, "min.max.df", "numstudies.df",
- "sR4.df", "sR5.df")
-
- #make transmittable via parser
- rm.names.rS.transmit <- paste(rm.names.rS,collapse=",")
-
- calltext.rm.rS <- call("rmDS", rm.names.rS.transmit)
-
-# rm.output.rS <-
- DSI::datashield.aggregate(datasources, calltext.rm.rS)
-
- }
-
-if(monitor.progress && rm.residual.objects){
- message("\n\nStep 8 of 8 complete:
- Cleaned up residual output from running ds.ranksSecure
-
-
- ")
- }
-
- if(monitor.progress && !rm.residual.objects){
- message("\n\nStep 8 of 8 complete:
- Residual output from running ds.ranksSecure NOT deleted
-
-
- ")
- }
-
-
-
-if(!generate.quantiles){
- message("\n\n\n"," FINAL RANKING PROCEDURES COMPLETE:
- PRIMARY RANKING OUTPUT IS IN DATA FRAME",summary.output.ranks.df,
- "
- WHICH IS SORTED BY",ranks.sort.by," AND HAS BEEN
- WRITTEN TO THE SERVERSIDE\n\n\n\n")
-
- info.message<-"As the argument was set to FALSE no quantiles have been estimated.Please set argument to TRUE if you want to estimate quantiles such as median, quartiles and 90th percentile"
- message("\n\n",info.message,"\n\n")
- return(info.message)
- }
-
-final.quantile.df<-
- ds.extractQuantiles(
- quantiles.for.estimation,
- summary.output.ranks.df,
- ranks.sort.by,
- rm.residual.objects,
- extract.datasources=NULL)
-
- return(final.quantile.df)
-}
-
-##########################################
-#ds.ranksSecure
diff --git a/armadillo_azure-pipelines.yml b/armadillo_azure-pipelines.yml
index 4ff1f497..480c55f5 100644
--- a/armadillo_azure-pipelines.yml
+++ b/armadillo_azure-pipelines.yml
@@ -58,10 +58,10 @@ schedules:
- master
always: true
- cron: "0 2 * * *"
- displayName: Nightly build - v6.3.5-dev
+ displayName: Nightly build - v7.0-dev-feat/performance
branches:
include:
- - v6.3.5-dev
+ - v7.0-dev-feat/performance
always: true
#########################################################################################
@@ -235,7 +235,7 @@ jobs:
curl -u admin:admin -X GET http://localhost:8080/packages
- curl -u admin:admin --max-time 300 -v -H 'Content-Type: multipart/form-data' -F "file=@dsBase_6.3.5-permissive.tar.gz" -X POST http://localhost:8080/install-package
+ curl -u admin:admin --max-time 300 -v -H 'Content-Type: multipart/form-data' -F "file=@dsBase_7.0-dev-feat_performance-permissive.tar.gz" -X POST http://localhost:8080/install-package
sleep 60
docker container restart dsbaseclient_armadillo_1
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index db3d7a18..a6e17a48 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -44,10 +44,10 @@ schedules:
- master
always: true
- cron: "0 2 * * *"
- displayName: Nightly build - v6.3.5-dev
+ displayName: Nightly build - v7.0-dev-feat/performance
branches:
include:
- - v6.3.5-dev
+ - v7.0-dev-feat/performance
always: true
#########################################################################################
@@ -216,7 +216,7 @@ jobs:
- bash: |
R -q -e "library(opalr); opal <- opal.login(username = 'administrator', password = 'datashield_test&', url = 'https://localhost:8443', opts = list(ssl_verifyhost=0, ssl_verifypeer=0)); opal.put(opal, 'system', 'conf', 'general', '_rPackage'); opal.logout(o)"
- R -q -e "library(opalr); opal <- opal.login('administrator','datashield_test&', url='https://localhost:8443/', opts = list(ssl_verifyhost=0, ssl_verifypeer=0)); dsadmin.install_github_package(opal, 'dsBase', username = 'datashield', ref = 'v6.3.5-dev'); opal.logout(opal)"
+ R -q -e "library(opalr); opal <- opal.login('administrator','datashield_test&', url='https://localhost:8443/', opts = list(ssl_verifyhost=0, ssl_verifypeer=0)); dsadmin.install_github_package(opal, 'dsBase', username = 'datashield', ref = '7.0-dev-feat_performance'); opal.logout(opal)"
sleep 60
diff --git a/docs/reference/ds.colnames.html b/docs/reference/ds.colnames.html
index 4e90a603..196ee293 100644
--- a/docs/reference/ds.colnames.html
+++ b/docs/reference/ds.colnames.html
@@ -94,28 +94,28 @@
Examples
require('dsBaseClient')builder<-DSI::newDSLoginBuilder()
-builder$append(server ="study1",
- url ="http://192.168.56.100:8080/",
- user ="administrator", password ="datashield_test&",
+builder$append(server ="study1",
+ url ="http://192.168.56.100:8080/",
+ user ="administrator", password ="datashield_test&", table ="CNSIM.CNSIM1", driver ="OpalDriver")
-builder$append(server ="study2",
- url ="http://192.168.56.100:8080/",
- user ="administrator", password ="datashield_test&",
+builder$append(server ="study2",
+ url ="http://192.168.56.100:8080/",
+ user ="administrator", password ="datashield_test&", table ="CNSIM.CNSIM2", driver ="OpalDriver")builder$append(server ="study3",
- url ="http://192.168.56.100:8080/",
- user ="administrator", password ="datashield_test&",
+ url ="http://192.168.56.100:8080/",
+ user ="administrator", password ="datashield_test&", table ="CNSIM.CNSIM3", driver ="OpalDriver")logindata<-builder$build()
-
+# Log onto the remote Opal training servers
-connections<-DSI::datashield.login(logins =logindata, assign =TRUE, symbol ="D")
+connections<-DSI::datashield.login(logins =logindata, assign =TRUE, symbol ="D")# Getting column names of the R objects stored in the server-sideds.colnames(x ="D", datasources =connections[1])#only the first server ("study1") is used# Clear the Datashield R sessions and logout
-datashield.logout(connections)
+datashield.logout(connections)}# }
diff --git a/docs/reference/ds.ranksSecure.html b/docs/reference/ds.ranksSecure.html
deleted file mode 100644
index 346d484a..00000000
--- a/docs/reference/ds.ranksSecure.html
+++ /dev/null
@@ -1,296 +0,0 @@
-
-Secure ranking of a vector across all sources — ds.ranksSecure • dsBaseClient
-
-
-
a character string in a format that can pass through
-the DataSHIELD R parser which specifies the name of the vector to be ranked.
-Needs to have same name in each data source.
-
-
-
quantiles.for.estimation
-
one of a restricted set of character strings.
-To mitigate disclosure risk only the following set of quantiles can be
-generated: c(0.025,0.05,0.10,0.20,0.25,0.30,0.3333,0.40,0.50,0.60,0.6667,
-0.70,0.75,0.80,0.90,0.95,0.975). The allowable formats for the argument
-are of the general form: "0.025-0.975" where the first number is the lowest
-quantile to be estimated and the second number is the equivalent highest
-quantile to estimate. These two quantiles are then estimated along with
-all allowable quantiles in between. The allowable argument values are then:
-"0.025-0.975", "0.05-0.95", "0.10-0.90", "0.20-0.80". Two alternative values
-are "quartiles" i.e. c(0.25,0.50,0.75), and "median" i.e. c(0.50). The
-default value is "0.05-0.95". If the sample size is so small that an extreme
-quartile could be disclosive the function will be terminated and an error
-message returned telling you that you might try using an argument with a
-narrower set of quantiles. This disclosure trap will be triggered if the
-total number of subjects across all studies divided by the total number
-of quantile values being estimated is less than or equal to nfilter.tab
-(the minimum cell size in a contingency table).
-
-
-
generate.quantiles
-
a logical value indicating whether the
-ds.ranksSecure function should carry on to estimate the key quantile
-values specified by argument <quantiles.for.estimation> or should stop
-once the global ranks have been created and written to the serverside.
-Default is TRUE and as the key quantiles are generally non-disclosive this
-is usually the setting to use. But, if there is some abnormal configuration
-of the clusters of values that are being ranked such that some values are
-treated as being missing and the processing stops, then setting
-generate.quantiles to FALSE allows the generation of ranks to complete so
-they can then be used for non-parametric analysis, even if the key values
-cannot be estimated. A real example of an unusual configuration was in a
-reasonably large dataset of survival times, where a substantial proportion
-of survival profiles were censored at precisely 10 years. This meant that
-the 97.5
-the former was allocated the value NA. This stopped processing of the ranks
-which could then be enabled by setting generate.quantiles to FALSE. However,
-if this problem is detected an error message is returned which indicates that
-in some cases (as in this case in fact) the problem can be circumvented
-by selecting a narrow range of key quantiles to estimate. In this case, in
-fact, this simply required changing the <quantiles.for.estimation> argument
-from "0.025-0.975" to "0.05-0.95".
-
-
-
output.ranks.df
-
a character string in a format that can pass through
-the DataSHIELD R parser which specifies an optional name for the
-data.frame written to the serverside on each data source that contains
-11 of the key output variables from the ranking procedure pertaining to that
-particular data source. This includes the global ranks and quantiles of each
-value of the V2BR (i.e. the values are ranked across all studies
-simultaneously). If no name is specified, the default name
-is allocated as "full.ranks.df". This data.frame contains disclosive
-information and cannot therefore be passed to the clientside.
-
-
-
summary.output.ranks.df
-
a character string in a format that can pass through
-the DataSHIELD R parser which specifies an optional name for the summary
-data.frame written to the serverside on each data source that contains
-5 of the key output variables from the ranking procedure pertaining to that
-particular data source. This again includes the global ranks and quantiles of each
-value of the V2BR (i.e. the values are ranked across all studies
-simultaneously). If no name is specified, the default name
-is allocated as "summary.ranks.df" This data.frame contains disclosive
-information and cannot therefore be passed to the clientside.
-
-
-
ranks.sort.by
-
a character string taking two possible values. These
-are "ID.orig" and "vals.orig". These define the order in which the
-output.ranks.df and summary.output.ranks.df data frames are presented. If
-the argument is set as "ID.orig" the order of rows in the output data frames
-are precisely the same as the order of original input vector that is being
-ranked (i.e. V2BR). This means the ranks can simply be cbinded to the
-matrix, data frame or tibble that originally included V2BR so it also
-includes the corresponding ranks. If it is set as "vals.orig" the output
-data frames are in order of increasing magnitude of the original values of
-V2BR. Default value is "ID.orig".
-
-
-
shared.seed.value
-
an integer value which is used to set the
-random seed generator in each study. Initially, the seed is set to be the
-same in all studies, so the order and parameters of the repeated
-encryption procedures are precisely the same in each study. Then a
-study-specific modification of the seed in each study ensures that the
-procedures initially generating the masking pseudodata (which are then
-subject to the same encryption procedures as the real data) are different
-in each study. For further information about the shared seed and how we
-intend to transmit it in the future, please see the detailed associated
-header document.
-
-
-
synth.real.ratio
-
an integer value specifying the ratio between the
-number of masking pseudodata values generated in each study compared to
-the number of real data values in V2BR.
-
-
-
NA.manage
-
character string taking three possible values: "NA.delete",
-"NA.low","NA.hi". This argument determines how missing values are managed
-before ranking. "NA.delete" results in all missing values being removed
-prior to ranking. This means that the vector of ranks in each study is
-shorter than the original vector of V2BR values by an amount corresponding
-to the number of missing values in V2BR in that study. Any rows containing
-missing values in V2BR are simply removed before the ranking procedure is
-initiated so the order of rows without missing data is unaltered. "NA.low"
-indicates that all missing values should be converted to a new value that
-has a meaningful magnitude that is lower (more negative or less positive)
-than the lowest non-missing value of V2BR in any of the studies. This means,
-for example, that if there are a total of M values of V2BR that are missing
-across all studies, there will be a total of M observations that are ranked
-lowest each with a rank of (M+1)/2. So if 7 are missing the lowest 7 ranks
-will be 4,4,4,4,4,4,4 and if 4 are missing the first 4 ranks will be
-2.5,2.5,2.5,2.5. "NA.hi" indicates that all missing values should be
-converted to a new value that has a meaningful magnitude that is higher(less
-negative or more positive)than the highest non-missing value of V2BR in any
-of the studies. This means, for example, that if there are a total of M
-values of V2BR that are missing across all studies and N non-missing
-values, there will be a total of M observations that are ranked
-highest each with a rank of (2N-M+1)/2. So if there are a total of 1000
-V2BR values and 9 are missing the highest 9 ranks will be 996, 996 ... 996.
-If NA.manage is either "NA.low" or "NA.hi" the final rank vector in each
-study will have the same length as the V2BR vector in that same study.
-2.5,2.5,2.5,2.5. The default value of the "NA.manage" argument is "NA.delete"
-
-
-
rm.residual.objects
-
logical value. Default = TRUE: at the beginning
-and end of each run of ds.ranksSecure delete all extraneous objects that are
-otherwise left behind. These are not usually needed, but could be of value
-if one were investigating a problem with the ranking. FALSE: do not delete
-the residual objects
-
-
-
monitor.progress
-
logical value. Default = FALSE. If TRUE, function
-outputs information about its progress.
-
-
-
datasources
-
specifies the particular opal object(s) to use. If the
-<datasources> argument is not specified (NULL) the default set of opals
-will be used. If <datasources> is specified, it should be set without
-inverted commas: e.g. datasources=opals.em. If you wish to
-apply the function solely to e.g. the second opal server in a set of three,
-the argument can be specified as: e.g. datasources=opals.em[2].
-If you wish to specify the first and third opal servers in a set you specify:
-e.g. datasources=opals.em[c(1,3)].
-
-
-
-
Value
-
the data frame objects specified by the arguments output.ranks.df
-and summary.output.ranks.df. These are written to the serverside in each
-study. Provided the sort order is consistent these data frames can be cbinded
-to any other data frame, matrix or tibble object containing V2BR or to the
-V2BR vector itself, allowing the global ranks and quantiles to be
-analysed rather than the actual values of V2BR. The last call within
-the ds.ranksSecure function is to another clientside function
-ds.extractQuantile (for further details see header for that function).
-This returns an additional data frame "final.quantile.df" of which the first
-column is the vector of key quantiles to be estimated as specified by the
-argument <quantiles.for.estimation> and the second column is the list of
-precise values of V2BR which correspond to these key quantiles. Because
-the serverside functions associated with ds.ranksSecure and
-ds.extractQuantile block potentially disclosive output (see information
-for parameter quantiles.for.estimation) the "final.quantile.df" is returned
-to the client allowing the direct reporting of V2BR values corresponding to
-key quantiles such as the quartiles, the median and 95th percentile etc. In
-addition a copy of the same data frame is also written to the serverside in
-each study allowing the value of key quantiles such as the median to be
-incorporated directly in calculations or transformations on the serverside
-regardless in which study (or studies) those key quantile values have
-occurred.
-
-
-
Details
-
ds.ranksSecure is a clientside function which calls a series of
-other clientside and serverside functions to securely generate the global
-ranks of a numeric vector "V2BR" (vector to be ranked)
-in order to set up analyses on V2BR based on
-non-parametric methods, some types of survival analysis and to derive true
-global quantiles (such as the median, lower (25
-and the 95
-global quantiles are, in general, different to the mean or median of the
-equivalent quantiles calculated independently in each data source separately.
-For more details about the cluster of functions that collectively
-enable secure global ranking and estimation of global quantiles see the
-associated document entitled "secure.global.ranking.docx".
diff --git a/dsBase_6.3.5-permissive.tar.gz b/dsBase_7.0-dev-feat_performance-permissive.tar.gz
similarity index 56%
rename from dsBase_6.3.5-permissive.tar.gz
rename to dsBase_7.0-dev-feat_performance-permissive.tar.gz
index 3703315d..1fca22d0 100644
Binary files a/dsBase_6.3.5-permissive.tar.gz and b/dsBase_7.0-dev-feat_performance-permissive.tar.gz differ
diff --git a/dsBase_6.3.5.tar.gz b/dsBase_7.0-dev-feat_performance.tar.gz
similarity index 53%
rename from dsBase_6.3.5.tar.gz
rename to dsBase_7.0-dev-feat_performance.tar.gz
index 0ea538d0..43e1ea28 100644
Binary files a/dsBase_6.3.5.tar.gz and b/dsBase_7.0-dev-feat_performance.tar.gz differ
diff --git a/man/ds.colnames.Rd b/man/ds.colnames.Rd
index 9460a567..e7391081 100644
--- a/man/ds.colnames.Rd
+++ b/man/ds.colnames.Rd
@@ -9,20 +9,20 @@ ds.colnames(x = NULL, datasources = NULL)
\arguments{
\item{x}{a character string providing the name of the input data frame or matrix.}
-\item{datasources}{a list of \code{\link[DSI]{DSConnection-class}} objects obtained after login.
+\item{datasources}{a list of \code{\link[DSI]{DSConnection-class}} objects obtained after login.
If the \code{datasources} argument is not specified
the default set of connections will be used: see \code{\link[DSI]{datashield.connections_default}}.}
}
\value{
-\code{ds.colnames} returns the column names of
+\code{ds.colnames} returns the column names of
the specified server-side data frame or matrix.
}
\description{
-Retrieves column names of an R object on the server-side.
+Retrieves column names of an R object on the server-side.
This function is similar to R function \code{colnames}.
}
\details{
-The input is restricted to the object of type \code{data.frame} or \code{matrix}.
+The input is restricted to the object of type \code{data.frame} or \code{matrix}.
Server function called: \code{colnamesDS}
}
@@ -37,28 +37,28 @@ Server function called: \code{colnamesDS}
require('dsBaseClient')
builder <- DSI::newDSLoginBuilder()
- builder$append(server = "study1",
- url = "http://192.168.56.100:8080/",
- user = "administrator", password = "datashield_test&",
+ builder$append(server = "study1",
+ url = "http://192.168.56.100:8080/",
+ user = "administrator", password = "datashield_test&",
table = "CNSIM.CNSIM1", driver = "OpalDriver")
- builder$append(server = "study2",
- url = "http://192.168.56.100:8080/",
- user = "administrator", password = "datashield_test&",
+ builder$append(server = "study2",
+ url = "http://192.168.56.100:8080/",
+ user = "administrator", password = "datashield_test&",
table = "CNSIM.CNSIM2", driver = "OpalDriver")
builder$append(server = "study3",
- url = "http://192.168.56.100:8080/",
- user = "administrator", password = "datashield_test&",
+ url = "http://192.168.56.100:8080/",
+ user = "administrator", password = "datashield_test&",
table = "CNSIM.CNSIM3", driver = "OpalDriver")
logindata <- builder$build()
-
+
# Log onto the remote Opal training servers
- connections <- DSI::datashield.login(logins = logindata, assign = TRUE, symbol = "D")
+ connections <- DSI::datashield.login(logins = logindata, assign = TRUE, symbol = "D")
# Getting column names of the R objects stored in the server-side
ds.colnames(x = "D",
datasources = connections[1]) #only the first server ("study1") is used
# Clear the Datashield R sessions and logout
- datashield.logout(connections)
+ datashield.logout(connections)
}
}
\seealso{
diff --git a/man/ds.ranksSecure.Rd b/man/ds.ranksSecure.Rd
deleted file mode 100644
index 294a754f..00000000
--- a/man/ds.ranksSecure.Rd
+++ /dev/null
@@ -1,202 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/ds.ranksSecure.R
-\name{ds.ranksSecure}
-\alias{ds.ranksSecure}
-\title{Secure ranking of a vector across all sources}
-\usage{
-ds.ranksSecure(
- input.var.name = NULL,
- quantiles.for.estimation = "0.05-0.95",
- generate.quantiles = TRUE,
- output.ranks.df = NULL,
- summary.output.ranks.df = NULL,
- ranks.sort.by = "ID.orig",
- shared.seed.value = 10,
- synth.real.ratio = 2,
- NA.manage = "NA.delete",
- rm.residual.objects = TRUE,
- monitor.progress = FALSE,
- datasources = NULL
-)
-}
-\arguments{
-\item{input.var.name}{a character string in a format that can pass through
-the DataSHIELD R parser which specifies the name of the vector to be ranked.
-Needs to have same name in each data source.}
-
-\item{quantiles.for.estimation}{one of a restricted set of character strings.
-To mitigate disclosure risk only the following set of quantiles can be
-generated: c(0.025,0.05,0.10,0.20,0.25,0.30,0.3333,0.40,0.50,0.60,0.6667,
-0.70,0.75,0.80,0.90,0.95,0.975). The allowable formats for the argument
-are of the general form: "0.025-0.975" where the first number is the lowest
-quantile to be estimated and the second number is the equivalent highest
-quantile to estimate. These two quantiles are then estimated along with
-all allowable quantiles in between. The allowable argument values are then:
-"0.025-0.975", "0.05-0.95", "0.10-0.90", "0.20-0.80". Two alternative values
-are "quartiles" i.e. c(0.25,0.50,0.75), and "median" i.e. c(0.50). The
-default value is "0.05-0.95". If the sample size is so small that an extreme
-quartile could be disclosive the function will be terminated and an error
-message returned telling you that you might try using an argument with a
-narrower set of quantiles. This disclosure trap will be triggered if the
-total number of subjects across all studies divided by the total number
-of quantile values being estimated is less than or equal to nfilter.tab
-(the minimum cell size in a contingency table).}
-
-\item{generate.quantiles}{a logical value indicating whether the
-ds.ranksSecure function should carry on to estimate the key quantile
-values specified by argument or should stop
-once the global ranks have been created and written to the serverside.
-Default is TRUE and as the key quantiles are generally non-disclosive this
-is usually the setting to use. But, if there is some abnormal configuration
-of the clusters of values that are being ranked such that some values are
-treated as being missing and the processing stops, then setting
-generate.quantiles to FALSE allows the generation of ranks to complete so
-they can then be used for non-parametric analysis, even if the key values
-cannot be estimated. A real example of an unusual configuration was in a
-reasonably large dataset of survival times, where a substantial proportion
-of survival profiles were censored at precisely 10 years. This meant that
-the 97.5% percentile could not be separated from the 95% percentile and so
-the former was allocated the value NA. This stopped processing of the ranks
-which could then be enabled by setting generate.quantiles to FALSE. However,
-if this problem is detected an error message is returned which indicates that
-in some cases (as in this case in fact) the problem can be circumvented
-by selecting a narrow range of key quantiles to estimate. In this case, in
-fact, this simply required changing the argument
-from "0.025-0.975" to "0.05-0.95".}
-
-\item{output.ranks.df}{a character string in a format that can pass through
-the DataSHIELD R parser which specifies an optional name for the
-data.frame written to the serverside on each data source that contains
-11 of the key output variables from the ranking procedure pertaining to that
-particular data source. This includes the global ranks and quantiles of each
-value of the V2BR (i.e. the values are ranked across all studies
-simultaneously). If no name is specified, the default name
-is allocated as "full.ranks.df". This data.frame contains disclosive
-information and cannot therefore be passed to the clientside.}
-
-\item{summary.output.ranks.df}{a character string in a format that can pass through
-the DataSHIELD R parser which specifies an optional name for the summary
-data.frame written to the serverside on each data source that contains
-5 of the key output variables from the ranking procedure pertaining to that
-particular data source. This again includes the global ranks and quantiles of each
-value of the V2BR (i.e. the values are ranked across all studies
-simultaneously). If no name is specified, the default name
-is allocated as "summary.ranks.df" This data.frame contains disclosive
-information and cannot therefore be passed to the clientside.}
-
-\item{ranks.sort.by}{a character string taking two possible values. These
-are "ID.orig" and "vals.orig". These define the order in which the
-output.ranks.df and summary.output.ranks.df data frames are presented. If
-the argument is set as "ID.orig" the order of rows in the output data frames
-are precisely the same as the order of original input vector that is being
-ranked (i.e. V2BR). This means the ranks can simply be cbinded to the
-matrix, data frame or tibble that originally included V2BR so it also
-includes the corresponding ranks. If it is set as "vals.orig" the output
-data frames are in order of increasing magnitude of the original values of
-V2BR. Default value is "ID.orig".}
-
-\item{shared.seed.value}{an integer value which is used to set the
-random seed generator in each study. Initially, the seed is set to be the
-same in all studies, so the order and parameters of the repeated
-encryption procedures are precisely the same in each study. Then a
-study-specific modification of the seed in each study ensures that the
-procedures initially generating the masking pseudodata (which are then
-subject to the same encryption procedures as the real data) are different
-in each study. For further information about the shared seed and how we
-intend to transmit it in the future, please see the detailed associated
-header document.}
-
-\item{synth.real.ratio}{an integer value specifying the ratio between the
-number of masking pseudodata values generated in each study compared to
-the number of real data values in V2BR.}
-
-\item{NA.manage}{character string taking three possible values: "NA.delete",
-"NA.low","NA.hi". This argument determines how missing values are managed
-before ranking. "NA.delete" results in all missing values being removed
-prior to ranking. This means that the vector of ranks in each study is
-shorter than the original vector of V2BR values by an amount corresponding
-to the number of missing values in V2BR in that study. Any rows containing
-missing values in V2BR are simply removed before the ranking procedure is
-initiated so the order of rows without missing data is unaltered. "NA.low"
-indicates that all missing values should be converted to a new value that
-has a meaningful magnitude that is lower (more negative or less positive)
-than the lowest non-missing value of V2BR in any of the studies. This means,
-for example, that if there are a total of M values of V2BR that are missing
-across all studies, there will be a total of M observations that are ranked
-lowest each with a rank of (M+1)/2. So if 7 are missing the lowest 7 ranks
-will be 4,4,4,4,4,4,4 and if 4 are missing the first 4 ranks will be
-2.5,2.5,2.5,2.5. "NA.hi" indicates that all missing values should be
-converted to a new value that has a meaningful magnitude that is higher(less
-negative or more positive)than the highest non-missing value of V2BR in any
-of the studies. This means, for example, that if there are a total of M
-values of V2BR that are missing across all studies and N non-missing
-values, there will be a total of M observations that are ranked
-highest each with a rank of (2N-M+1)/2. So if there are a total of 1000
-V2BR values and 9 are missing the highest 9 ranks will be 996, 996 ... 996.
-If NA.manage is either "NA.low" or "NA.hi" the final rank vector in each
-study will have the same length as the V2BR vector in that same study.
-2.5,2.5,2.5,2.5. The default value of the "NA.manage" argument is "NA.delete"}
-
-\item{rm.residual.objects}{logical value. Default = TRUE: at the beginning
-and end of each run of ds.ranksSecure delete all extraneous objects that are
-otherwise left behind. These are not usually needed, but could be of value
-if one were investigating a problem with the ranking. FALSE: do not delete
-the residual objects}
-
-\item{monitor.progress}{logical value. Default = FALSE. If TRUE, function
-outputs information about its progress.}
-
-\item{datasources}{specifies the particular opal object(s) to use. If the
- argument is not specified (NULL) the default set of opals
-will be used. If is specified, it should be set without
-inverted commas: e.g. datasources=opals.em. If you wish to
-apply the function solely to e.g. the second opal server in a set of three,
-the argument can be specified as: e.g. datasources=opals.em[2].
-If you wish to specify the first and third opal servers in a set you specify:
-e.g. datasources=opals.em[c(1,3)].}
-}
-\value{
-the data frame objects specified by the arguments output.ranks.df
-and summary.output.ranks.df. These are written to the serverside in each
-study. Provided the sort order is consistent these data frames can be cbinded
-to any other data frame, matrix or tibble object containing V2BR or to the
-V2BR vector itself, allowing the global ranks and quantiles to be
-analysed rather than the actual values of V2BR. The last call within
-the ds.ranksSecure function is to another clientside function
-ds.extractQuantile (for further details see header for that function).
-This returns an additional data frame "final.quantile.df" of which the first
-column is the vector of key quantiles to be estimated as specified by the
-argument and the second column is the list of
-precise values of V2BR which correspond to these key quantiles. Because
-the serverside functions associated with ds.ranksSecure and
-ds.extractQuantile block potentially disclosive output (see information
-for parameter quantiles.for.estimation) the "final.quantile.df" is returned
-to the client allowing the direct reporting of V2BR values corresponding to
-key quantiles such as the quartiles, the median and 95th percentile etc. In
-addition a copy of the same data frame is also written to the serverside in
-each study allowing the value of key quantiles such as the median to be
-incorporated directly in calculations or transformations on the serverside
-regardless in which study (or studies) those key quantile values have
-occurred.
-}
-\description{
-Securely generate the ranks of a numeric vector and estimate
-true global quantiles across all data sources simultaneously
-}
-\details{
-ds.ranksSecure is a clientside function which calls a series of
-other clientside and serverside functions to securely generate the global
-ranks of a numeric vector "V2BR" (vector to be ranked)
-in order to set up analyses on V2BR based on
-non-parametric methods, some types of survival analysis and to derive true
-global quantiles (such as the median, lower (25%) and upper (75%) quartiles,
-and the 95% and 97.5% quantiles) across all sources simultaneously. These
-global quantiles are, in general, different to the mean or median of the
-equivalent quantiles calculated independently in each data source separately.
-For more details about the cluster of functions that collectively
-enable secure global ranking and estimation of global quantiles see the
-associated document entitled "secure.global.ranking.docx".
-}
-\author{
-Paul Burton 4th November, 2021
-}
diff --git a/opal_azure-pipelines.yml b/opal_azure-pipelines.yml
index dab96a79..a9410f69 100644
--- a/opal_azure-pipelines.yml
+++ b/opal_azure-pipelines.yml
@@ -58,10 +58,10 @@ schedules:
- master
always: true
- cron: "0 2 * * *"
- displayName: Nightly build - v6.3.5-dev
+ displayName: Nightly build - v7.0-dev-feat/performance
branches:
include:
- - v6.3.5-dev
+ - v7.0-dev-feat/performance
always: true
#########################################################################################
@@ -235,7 +235,7 @@ jobs:
- bash: |
R -q -e "library(opalr); opal <- opal.login(username = 'administrator', password = 'datashield_test&', url = 'https://localhost:8443', opts = list(ssl_verifyhost=0, ssl_verifypeer=0)); opal.put(opal, 'system', 'conf', 'general', '_rPackage'); opal.logout(o)"
- R -q -e "library(opalr); opal <- opal.login('administrator','datashield_test&', url='https://localhost:8443/', opts = list(ssl_verifyhost=0, ssl_verifypeer=0)); dsadmin.install_github_package(opal, 'dsBase', username = 'datashield', ref = 'v6.3.5-dev'); opal.logout(opal)"
+ R -q -e "library(opalr); opal <- opal.login('administrator','datashield_test&', url='https://localhost:8443/', opts = list(ssl_verifyhost=0, ssl_verifypeer=0)); dsadmin.install_github_package(opal, 'dsBase', username = 'datashield', ref = '7.0-dev-feat/performance'); opal.logout(opal)"
sleep 60
diff --git a/tests/testthat/perf_files/armadillo_azure-pipeline.csv b/tests/testthat/perf_files/armadillo_azure-pipeline.csv
index e913cff9..03d36d8f 100644
--- a/tests/testthat/perf_files/armadillo_azure-pipeline.csv
+++ b/tests/testthat/perf_files/armadillo_azure-pipeline.csv
@@ -1,14 +1,14 @@
"refer_name","rate","lower_tolerance","upper_tolerance"
-"conndisconn::perf::simple0","0.1225","0.5","2"
-"ds.abs::perf::0","5.820","0.5","2"
-"ds.asInteger::perf:0","5.328","0.5","2"
-"ds.asList::perf:0","11.71","0.5","2"
-"ds.asNumeric::perf:0","5.268","0.5","2"
-"ds.assign::perf::0","9.229","0.5","2"
-"ds.class::perf::combine:0","10.84","0.5","2"
-"ds.colnames::perf:0","7.993","0.5","2"
-"ds.exists::perf::combine:0","21.68","0.5","2"
-"ds.length::perf::combine:0","21.84","0.5","2"
-"ds.mean::perf::combine:0","22.01","0.5","2"
-"ds.mean::perf::split:0","22.22","0.5","2"
-"void::perf::void::0","48520.0","0.5","2"
+"conndisconn::perf::simple0","0.1651","0.5","2"
+"ds.abs::perf::0","6.273","0.5","2"
+"ds.asInteger::perf:0","5.731","0.5","2"
+"ds.asList::perf:0","12.74","0.5","2"
+"ds.asNumeric::perf:0","5.637","0.5","2"
+"ds.assign::perf::0","10.46","0.5","2"
+"ds.class::perf::combine:0","12.69","0.5","2"
+"ds.colnames::perf:0","9.518","0.5","2"
+"ds.exists::perf::combine:0","25.33","0.5","2"
+"ds.length::perf::combine:0","25.45","0.5","2"
+"ds.mean::perf::combine:0","25.37","0.5","2"
+"ds.mean::perf::split:0","25.74","0.5","2"
+"void::perf::void::0","56310.0","0.5","2"
diff --git a/tests/testthat/perf_files/default_perf_profile.csv b/tests/testthat/perf_files/default_perf_profile.csv
index 9a649b88..9f1ae6e5 100644
--- a/tests/testthat/perf_files/default_perf_profile.csv
+++ b/tests/testthat/perf_files/default_perf_profile.csv
@@ -6,7 +6,7 @@
"ds.asNumeric::perf:0","2.185","0.5","2"
"ds.assign::perf::0","5.490","0.5","2"
"ds.class::perf::combine:0","4.760","0.5","2"
-"ds.colnames::perf:0","9.942","0.5","2"
+"ds.colnames::perf:0","4.218","0.5","2"
"ds.exists::perf::combine:0","11.09","0.5","2"
"ds.length::perf::combine:0","9.479","0.5","2"
"ds.mean::perf::combine:0","9.650","0.5","2"
diff --git a/tests/testthat/perf_files/opal_azure-pipeline.csv b/tests/testthat/perf_files/opal_azure-pipeline.csv
index d75711a3..9f1ae6e5 100644
--- a/tests/testthat/perf_files/opal_azure-pipeline.csv
+++ b/tests/testthat/perf_files/opal_azure-pipeline.csv
@@ -6,7 +6,7 @@
"ds.asNumeric::perf:0","2.185","0.5","2"
"ds.assign::perf::0","5.490","0.5","2"
"ds.class::perf::combine:0","4.760","0.5","2"
-"ds.colnames::perf:0","4.159","0.5","2"
+"ds.colnames::perf:0","4.218","0.5","2"
"ds.exists::perf::combine:0","11.09","0.5","2"
"ds.length::perf::combine:0","9.479","0.5","2"
"ds.mean::perf::combine:0","9.650","0.5","2"
diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R
index 512e649c..5980e9df 100644
--- a/tests/testthat/setup.R
+++ b/tests/testthat/setup.R
@@ -33,5 +33,6 @@ source("connection_to_datasets/init_testing_datasets.R")
source("connection_to_datasets/init_studies_datasets.R")
source("connection_to_datasets/init_discordant_datasets.R")
source("connection_to_datasets/init_mediation_datasets.R")
+options(datashield.errors.print = FALSE)
# context("setup - done")
diff --git a/tests/testthat/test-arg-ds.ranksSecure.R b/tests/testthat/test-arg-ds.ranksSecure.R
deleted file mode 100644
index fc6b99e1..00000000
--- a/tests/testthat/test-arg-ds.ranksSecure.R
+++ /dev/null
@@ -1,102 +0,0 @@
-#-------------------------------------------------------------------------------
-# Copyright (c) 2019-2022 University of Newcastle upon Tyne. All rights reserved.
-# Copyright (c) 2022-2025 Arjuna Technologies, Newcastle upon Tyne. All rights reserved.
-#
-# This program and the accompanying materials
-# are made available under the terms of the GNU Public License v3.0.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see .
-#-------------------------------------------------------------------------------
-
-#
-# Set up
-#
-
-# context("ds.ranksSecure::arg::setup")
-
-connect.all.datasets()
-
-test_that("setup", {
- ds_expect_variables(c("D"))
-})
-
-#
-# Tests
-#
-
-# context("ds.ranksSecure::arg::missing variable")
-test_that("missing variable", {
- expect_error(ds.ranksSecure("LAB_MISSING"), "There are some DataSHIELD errors, list them with datashield.errors()", fixed = TRUE)
-
- res.errors <- DSI::datashield.errors()
-
- expect_length(res.errors, 3)
- expect_match(res.errors$study1, "* object 'LAB_MISSING' not found")
- expect_match(res.errors$study2, "* object 'LAB_MISSING' not found")
- expect_match(res.errors$study3, "* object 'LAB_MISSING' not found")
-})
-
-# context("ds.ranksSecure::arg::NULL variable")
-test_that("NULL variable", {
- DSI::datashield.assign.expr(conns = ds.test_env$connections, symbol = "LAB_NULL", expr = "NULL")
-
- res.class <- ds.class("LAB_NULL")
-
- expect_length(res.class, 3)
- expect_equal(res.class$study1, "NULL")
- expect_equal(res.class$study2, "NULL")
- expect_equal(res.class$study3, "NULL")
-
- expect_error(expect_warning(ds.ranksSecure("LAB_NULL"), "no non-missing arguments to max; returning -Inf", fixed = TRUE), "There are some DataSHIELD errors, list them with datashield.errors()", fixed = TRUE)
-
- res.errors <- DSI::datashield.errors()
-
- expect_length(res.errors, 3)
- expect_match(res.errors$study1, "* Error in stats::complete.cases\\(input.var\\) : \n no input has determined the number of cases")
- expect_match(res.errors$study2, "* Error in stats::complete.cases\\(input.var\\) : \n no input has determined the number of cases")
- expect_match(res.errors$study3, "* Error in stats::complete.cases\\(input.var\\) : \n no input has determined the number of cases")
-})
-
-# context("ds.ranksSecure::arg::'text' variable")
-test_that("'text' variable", {
- expect_error(ds.ranksSecure("D$CHARACTER"), "There are some DataSHIELD errors, list them with datashield.errors()", fixed = TRUE)
-
- res.errors <- DSI::datashield.errors()
-
- expect_length(res.errors, 3)
- expect_match(res.errors$study1, "* Error in \\(1 - h\\) \\* qs\\[i\\] : non-numeric argument to binary operator")
- expect_match(res.errors$study2, "* Error in \\(1 - h\\) \\* qs\\[i\\] : non-numeric argument to binary operator")
- expect_match(res.errors$study3, "* Error in \\(1 - h\\) \\* qs\\[i\\] : non-numeric argument to binary operator")
-})
-
-# context("ds.ranksSecure::arg::'logical' variable")
-test_that("'logical' variable", {
- expect_error(ds.ranksSecure("D$LOGICAL"), "FAILED: one of the extreme quantile estimates is NA probably because of a cluster of values at one end of the range of possible values. Try setting a narrower range of quantile values via the argument", fixed = TRUE)
-})
-
-# context("ds.ranksSecure::arg::'integer factor' variable")
-test_that("'integer factor' variable", {
- expect_error(expect_warning(ds.ranksSecure("D$INTEGER_FACTOR"), "no non-missing arguments to max; returning -Inf", fixed = TRUE), "There are some DataSHIELD errors, list them with datashield.errors()", fixed = TRUE)
-
- res.errors <- DSI::datashield.errors()
-
- expect_length(res.errors, 3)
- expect_match(res.errors$study1, "* Error in stats::complete.cases\\(input.var\\) : \n no input has determined the number of cases")
- expect_match(res.errors$study2, "* Error in stats::complete.cases\\(input.var\\) : \n no input has determined the number of cases")
- expect_match(res.errors$study3, "* Error in stats::complete.cases\\(input.var\\) : \n no input has determined the number of cases")
-})
-
-#
-# Done
-#
-
-# context("ds.ranksSecure::arg::shutdown")
-
-test_that("setup", {
- ds_expect_variables(c("D", "LAB_NULL", "input.mean.sd.df", "min.max.df", "summary.ranks.df", "testvar.ranks"))
-})
-
-disconnect.all.datasets()
-
-# context("ds.ranksSecure::arg::done")
diff --git a/tests/testthat/test-smk-ds.colnames.R b/tests/testthat/test-smk-ds.colnames.R
index ee98cc2e..b7d289ac 100644
--- a/tests/testthat/test-smk-ds.colnames.R
+++ b/tests/testthat/test-smk-ds.colnames.R
@@ -47,7 +47,7 @@ test_that("simple colnames", {
test_that("fails if the object does not exist", {
expect_error(
ds.colnames("non_existing_df"),
- regexp = "There are some DataSHIELD errors, list them with datashield.error()",
+ regexp = "The input object non_existing_df is not defined in sim1, sim2, sim3!",
ignore.case = TRUE
)
})
diff --git a/tests/testthat/test-smk-ds.listClientsideFunctions.R b/tests/testthat/test-smk-ds.listClientsideFunctions.R
index 4e5f6791..a4a541e0 100644
--- a/tests/testthat/test-smk-ds.listClientsideFunctions.R
+++ b/tests/testthat/test-smk-ds.listClientsideFunctions.R
@@ -106,7 +106,6 @@ test_that("check results", {
"ds.numNA",
"ds.qlspline",
"ds.quantileMean",
- "ds.ranksSecure",
"ds.rbind",
"ds.rBinom",
"ds.recodeLevels",
diff --git a/tests/testthat/test-smk-ds.ranksSecure.R b/tests/testthat/test-smk-ds.ranksSecure.R
deleted file mode 100644
index 43720c1f..00000000
--- a/tests/testthat/test-smk-ds.ranksSecure.R
+++ /dev/null
@@ -1,131 +0,0 @@
-#-------------------------------------------------------------------------------
-# Copyright (c) 2019-2022 University of Newcastle upon Tyne. All rights reserved.
-# Copyright (c) 2022-2025 Arjuna Technologies, Newcastle upon Tyne. All rights reserved.
-#
-# This program and the accompanying materials
-# are made available under the terms of the GNU Public License v3.0.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see .
-#-------------------------------------------------------------------------------
-
-#
-# Set up
-#
-
-# context("ds.ranksSecure::smk::setup")
-
-connect.studies.dataset.cnsim(list("LAB_TRIG", "LAB_TSC"))
-
-test_that("setup", {
- ds_expect_variables(c("D"))
-})
-
-#
-# Tests
-#
-
-# context("ds.ranksSecure::smk::continous, without NAs, all positive")
-test_that("continous, without NAs, all positive", {
- res.cc <- ds.completeCases("D$LAB_TSC", newobj="CC_LAB_TSC")
- expect_equal(res.cc$validity.check, " appears valid in all sources")
-
- res.num.na <- ds.numNA("CC_LAB_TSC")
- expect_length(res.num.na, 3)
- expect_equal(res.num.na$sim1, 0)
- expect_equal(res.num.na$sim2, 0)
- expect_equal(res.num.na$sim3, 0)
-
- res <- ds.ranksSecure("CC_LAB_TSC")
-
- expect_length(res, 2)
- expect_length(class(res), 1)
- expect_true(all(class(res) %in% c('data.frame')))
- expect_length(names(res), 2)
- expect_true(all(names(res) %in% c('evaluation.quantiles', 'final.quantile.vector')))
-
- expect_length(res$evaluation.quantiles, 15)
- expect_length(class(res$evaluation.quantiles), 1)
- expect_true(all(class(res$evaluation.quantiles) %in% c('numeric')))
- expect_length(res$final.quantile.vector, 15)
- expect_length(class(res$final.quantile.vector), 1)
- expect_true(all(class(res$final.quantile.vector) %in% c('numeric')))
-})
-
-# context("ds.ranksSecure::smk::continous, without NAs, with negative")
-test_that("continous, without NAs, with negative", {
- res.cc <- ds.completeCases("D$LAB_TRIG", newobj="CC_LAB_TRIG")
- expect_equal(res.cc$validity.check, " appears valid in all sources")
-
- res.num.na <- ds.numNA("CC_LAB_TRIG")
- expect_length(res.num.na, 3)
- expect_equal(res.num.na$sim1, 0)
- expect_equal(res.num.na$sim2, 0)
- expect_equal(res.num.na$sim3, 0)
-
- res <- ds.ranksSecure("CC_LAB_TRIG")
-
- expect_length(res, 2)
- expect_length(class(res), 1)
- expect_true(all(class(res) %in% c('data.frame')))
- expect_length(names(res), 2)
- expect_true(all(names(res) %in% c('evaluation.quantiles', 'final.quantile.vector')))
-
- expect_length(res$evaluation.quantiles, 15)
- expect_length(class(res$evaluation.quantiles), 1)
- expect_true(all(class(res$evaluation.quantiles) %in% c('numeric')))
- expect_length(res$final.quantile.vector, 15)
- expect_length(class(res$final.quantile.vector), 1)
- expect_true(all(class(res$final.quantile.vector) %in% c('numeric')))
-})
-
-# context("ds.ranksSecure::smk::continous, with NAs, all positive")
-test_that("continous, with NAs, all positive", {
- res <- ds.ranksSecure("D$LAB_TSC")
-
- expect_length(res, 2)
- expect_length(class(res), 1)
- expect_true(all(class(res) %in% c('data.frame')))
- expect_length(names(res), 2)
- expect_true(all(names(res) %in% c('evaluation.quantiles', 'final.quantile.vector')))
-
- expect_length(res$evaluation.quantiles, 15)
- expect_length(class(res$evaluation.quantiles), 1)
- expect_true(all(class(res$evaluation.quantiles) %in% c('numeric')))
- expect_length(res$final.quantile.vector, 15)
- expect_length(class(res$final.quantile.vector), 1)
- expect_true(all(class(res$final.quantile.vector) %in% c('numeric')))
-})
-
-# context("ds.ranksSecure::smk::continous, with NAs, with negative")
-test_that("continous, with NAs, with negative", {
- res <- ds.ranksSecure("D$LAB_TRIG")
-
- expect_length(res, 2)
- expect_length(class(res), 1)
- expect_true(all(class(res) %in% c('data.frame')))
- expect_length(names(res), 2)
- expect_true(all(names(res) %in% c('evaluation.quantiles', 'final.quantile.vector')))
-
- expect_length(res$evaluation.quantiles, 15)
- expect_length(class(res$evaluation.quantiles), 1)
- expect_true(all(class(res$evaluation.quantiles) %in% c('numeric')))
- expect_length(res$final.quantile.vector, 15)
- expect_length(class(res$final.quantile.vector), 1)
- expect_true(all(class(res$final.quantile.vector) %in% c('numeric')))
-})
-
-
-#
-# Done
-#
-
-# context("ds.ranksSecure::smk::shutdown")
-
-test_that("setup", {
- ds_expect_variables(c("D", "CC_LAB_TSC", "CC_LAB_TRIG", "final.quantile.df", "summary.ranks.df", "testvar.ranks"))
-})
-
-disconnect.studies.dataset.cnsim()
-
-# context("ds.ranksSecure::smk::done")
diff --git a/tests/testthat/test-smk-isDefined.R b/tests/testthat/test-smk-isDefined.R
index ab3f43b0..606fbdfe 100644
--- a/tests/testthat/test-smk-isDefined.R
+++ b/tests/testthat/test-smk-isDefined.R
@@ -14,7 +14,6 @@
#
# context("isDefined::smk::setup")
-
connect.discordant.dataset.simple(list("A", "B", "C"))
test_that("setup", {
@@ -30,7 +29,7 @@ test_that("setup", {
# context("isDefined::smk::default")
test_that("default test, dataframe D", {
res <- isDefined(ds.test_env$connections, "D")
-
+
expect_length(class(res), 1)
expect_true(all("list" %in% class(res)))
expect_length(res, 3)
@@ -45,7 +44,7 @@ test_that("default test, dataframe E", {
test_that("default test, dataframe column E$A", {
# expect_error(isDefined(ds.test_env$connections, "E$A"), "The input object E$A is not defined in discordant1, discordant2, discordant3!", fixed=TRUE)
- expect_error(isDefined(ds.test_env$connections, "E$A"), "There are some DataSHIELD errors, list them with datashield.errors()", fixed=TRUE)
+ expect_error(isDefined(ds.test_env$connections, "E$A"), "There are some DataSHIELD errors", fixed=TRUE)
err <- DSI::datashield.errors();
@@ -94,7 +93,7 @@ test_that("default test, dataframe columns D$A,D$B", {
# context("isDefined::smk::error.message=FALSE")
test_that("error.message=FALSE test, dataframe D", {
res <- isDefined(ds.test_env$connections, "D", error.message = FALSE)
-
+
expect_length(class(res), 1)
expect_true(all("list" %in% class(res)))
expect_length(res, 3)
@@ -116,7 +115,7 @@ test_that("error.message=FALSE test, dataframe E", {
test_that("error.message=FALSE test, dataframe column E$A", {
# expect_error(isDefined(ds.test_env$connections, "E$A", error.message = FALSE), "The input object E$A is not defined in discordant1, discordant2, discordant3!", fixed=TRUE)
- expect_error(isDefined(ds.test_env$connections, "E$A", error.message = FALSE), "There are some DataSHIELD errors, list them with datashield.errors()", fixed=TRUE)
+ expect_error(isDefined(ds.test_env$connections, "E$A", error.message = FALSE), "There are some DataSHIELD errors", fixed=TRUE)
err <- DSI::datashield.errors();
@@ -200,7 +199,7 @@ test_that("error.message=FALSE test, dataframe columns D$A,D$B", {
# context("isDefined::smk::error.message=TRUE")
test_that("error.message=TRUE test, dataframe D", {
res <- isDefined(ds.test_env$connections, "D", error.message = TRUE)
-
+
expect_length(class(res), 1)
expect_true(all("list" %in% class(res)))
expect_length(res, 3)
@@ -215,7 +214,7 @@ test_that("error.message=TRUE test, dataframe E", {
test_that("error.message=TRUE test, dataframe column E$A", {
# expect_error(isDefined(ds.test_env$connections, "E$A", error.message = TRUE), "The input object E$A is not defined in discordant1, discordant2, discordant3!", fixed=TRUE)
- expect_error(isDefined(ds.test_env$connections, "E$A", error.message = TRUE), "There are some DataSHIELD errors, list them with datashield.errors()", fixed=TRUE)
+ expect_error(isDefined(ds.test_env$connections, "E$A", error.message = TRUE), "There are some DataSHIELD errors", fixed=TRUE)
err <- DSI::datashield.errors();
diff --git a/tests/testthat/test-smk_expt-ds.ranksSecure.R b/tests/testthat/test-smk_expt-ds.ranksSecure.R
index edb86cd2..4da33430 100644
--- a/tests/testthat/test-smk_expt-ds.ranksSecure.R
+++ b/tests/testthat/test-smk_expt-ds.ranksSecure.R
@@ -27,48 +27,48 @@ test_that("setup", {
# context("ds.ranksSecure::smk_expt::continous, without NAs, all positive")
test_that("continous, without NAs, all positive", {
- res.cc <- ds.completeCases("D$LAB_TSC", newobj="CC_LAB_TSC")
- expect_equal(res.cc$validity.check, " appears valid in all sources")
-
- res.num.na <- ds.numNA("CC_LAB_TSC")
- expect_length(res.num.na, 3)
- expect_equal(res.num.na$sim1, 0)
- expect_equal(res.num.na$sim2, 0)
- expect_equal(res.num.na$sim3, 0)
-
- res <- ds.ranksSecure("CC_LAB_TSC")
-
- expect_equal_to_reference(res, 'smk_expt-results/ds.ranksSecure-cont.rds')
+# res.cc <- ds.completeCases("D$LAB_TSC", newobj="CC_LAB_TSC")
+# expect_equal(res.cc$validity.check, " appears valid in all sources")
+#
+# res.num.na <- ds.numNA("CC_LAB_TSC")
+# expect_length(res.num.na, 3)
+# expect_equal(res.num.na$sim1, 0)
+# expect_equal(res.num.na$sim2, 0)
+# expect_equal(res.num.na$sim3, 0)
+#
+# res <- ds.ranksSecure("CC_LAB_TSC")
+#
+# expect_equal_to_reference(res, 'smk_expt-results/ds.ranksSecure-cont.rds')
})
# context("ds.ranksSecure::smk_expt::continous, without NAs, with negative")
test_that("continous, without NAs, with negative", {
- res.cc <- ds.completeCases("D$LAB_TRIG", newobj="CC_LAB_TRIG")
- expect_equal(res.cc$validity.check, " appears valid in all sources")
-
- res.num.na <- ds.numNA("CC_LAB_TRIG")
- expect_length(res.num.na, 3)
- expect_equal(res.num.na$sim1, 0)
- expect_equal(res.num.na$sim2, 0)
- expect_equal(res.num.na$sim3, 0)
-
- res <- ds.ranksSecure("CC_LAB_TRIG")
-
- expect_equal_to_reference(res, 'smk_expt-results/ds.ranksSecure-cont_neg.rds')
+# res.cc <- ds.completeCases("D$LAB_TRIG", newobj="CC_LAB_TRIG")
+# expect_equal(res.cc$validity.check, " appears valid in all sources")
+#
+# res.num.na <- ds.numNA("CC_LAB_TRIG")
+# expect_length(res.num.na, 3)
+# expect_equal(res.num.na$sim1, 0)
+# expect_equal(res.num.na$sim2, 0)
+# expect_equal(res.num.na$sim3, 0)
+#
+# res <- ds.ranksSecure("CC_LAB_TRIG")
+#
+# expect_equal_to_reference(res, 'smk_expt-results/ds.ranksSecure-cont_neg.rds')
})
# context("ds.ranksSecure::smk_expt::continous, with NAs, all positive")
test_that("continous, with NAs, all positive", {
- res <- ds.ranksSecure("D$LAB_TSC")
-
- expect_equal_to_reference(res, 'smk_expt-results/ds.ranksSecure-cont_nas.rds')
+# res <- ds.ranksSecure("D$LAB_TSC")
+#
+# expect_equal_to_reference(res, 'smk_expt-results/ds.ranksSecure-cont_nas.rds')
})
# context("ds.ranksSecure::smk_expt::continous, with NAs, with negative")
test_that("continous, with NAs, with negative", {
- res <- ds.ranksSecure("D$LAB_TRIG")
-
- expect_equal_to_reference(res, 'smk_expt-results/ds.ranksSecure-cont_nas_neg.rds')
+# res <- ds.ranksSecure("D$LAB_TRIG")
+#
+# expect_equal_to_reference(res, 'smk_expt-results/ds.ranksSecure-cont_nas_neg.rds')
})
#
@@ -78,7 +78,7 @@ test_that("continous, with NAs, with negative", {
# context("ds.ranksSecure::smk_expt::shutdown")
test_that("setup", {
- ds_expect_variables(c("D", "CC_LAB_TSC", "CC_LAB_TRIG", "final.quantile.df", "summary.ranks.df", "testvar.ranks"))
+# ds_expect_variables(c("D", "CC_LAB_TSC", "CC_LAB_TRIG", "final.quantile.df", "summary.ranks.df", "testvar.ranks"))
})
disconnect.studies.dataset.cnsim()