diff --git a/.Rbuildignore b/.Rbuildignore index b66ab898..afbc8f5c 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -17,8 +17,8 @@ ^R/secure.global.ranking.md$ ^_pkgdown\.yml$ ^docs$ -^dsBase_6.3.5.tar.gz$ -^dsBase_6.3.5-permissive.tar.gz$ +^dsBase_7.0-dev-feat_performance\.tar\.gz$ +^dsBase_7.0-dev-feat_performance-permissive\.tar\.gz$ ^dsDanger_6.3.4.tar.gz$ ^\.circleci$ ^\.circleci/config\.yml$ diff --git a/NAMESPACE b/NAMESPACE index a41b8f0a..8bdab82e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -89,7 +89,6 @@ export(ds.rBinom) export(ds.rNorm) export(ds.rPois) export(ds.rUnif) -export(ds.ranksSecure) export(ds.rbind) export(ds.reShape) export(ds.recodeLevels) diff --git a/R/ds.colnames.R b/R/ds.colnames.R index a9e80252..a4b98b1a 100644 --- a/R/ds.colnames.R +++ b/R/ds.colnames.R @@ -1,51 +1,51 @@ #' #' @title Produces column names of the R object in the server-side -#' @description Retrieves column names of an R object on the server-side. +#' @description Retrieves column names of an R object on the server-side. #' This function is similar to R function \code{colnames}. -#' @details The input is restricted to the object of type \code{data.frame} or \code{matrix}. -#' +#' @details The input is restricted to the object of type \code{data.frame} or \code{matrix}. +#' #' Server function called: \code{colnamesDS} #' @param x a character string providing the name of the input data frame or matrix. -#' @param datasources a list of \code{\link[DSI]{DSConnection-class}} objects obtained after login. +#' @param datasources a list of \code{\link[DSI]{DSConnection-class}} objects obtained after login. #' If the \code{datasources} argument is not specified #' the default set of connections will be used: see \code{\link[DSI]{datashield.connections_default}}. -#' @return \code{ds.colnames} returns the column names of -#' the specified server-side data frame or matrix. +#' @return \code{ds.colnames} returns the column names of +#' the specified server-side data frame or matrix. #' @author DataSHIELD Development Team #' @seealso \code{\link{ds.dim}} to obtain the dimensions of a matrix or a data frame. -#' @examples +#' @examples #' \dontrun{ -#' +#' #' ## Version 6, for version 5 see the Wiki #' # Connecting to the Opal servers -#' +#' #' require('DSI') #' require('DSOpal') #' require('dsBaseClient') -#' +#' #' builder <- DSI::newDSLoginBuilder() -#' builder$append(server = "study1", -#' url = "http://192.168.56.100:8080/", -#' user = "administrator", password = "datashield_test&", +#' builder$append(server = "study1", +#' url = "http://192.168.56.100:8080/", +#' user = "administrator", password = "datashield_test&", #' table = "CNSIM.CNSIM1", driver = "OpalDriver") -#' builder$append(server = "study2", -#' url = "http://192.168.56.100:8080/", -#' user = "administrator", password = "datashield_test&", +#' builder$append(server = "study2", +#' url = "http://192.168.56.100:8080/", +#' user = "administrator", password = "datashield_test&", #' table = "CNSIM.CNSIM2", driver = "OpalDriver") #' builder$append(server = "study3", -#' url = "http://192.168.56.100:8080/", -#' user = "administrator", password = "datashield_test&", +#' url = "http://192.168.56.100:8080/", +#' user = "administrator", password = "datashield_test&", #' table = "CNSIM.CNSIM3", driver = "OpalDriver") #' logindata <- builder$build() -#' +#' #' # Log onto the remote Opal training servers -#' connections <- DSI::datashield.login(logins = logindata, assign = TRUE, symbol = "D") -#' +#' connections <- DSI::datashield.login(logins = logindata, assign = TRUE, symbol = "D") +#' #' # Getting column names of the R objects stored in the server-side #' ds.colnames(x = "D", #' datasources = connections[1]) #only the first server ("study1") is used #' # Clear the Datashield R sessions and logout -#' datashield.logout(connections) +#' datashield.logout(connections) #' } #' @export #' @@ -65,6 +65,17 @@ ds.colnames <- function(x=NULL, datasources=NULL) { stop("Please provide the name of a data.frame or matrix!", call.=FALSE) } + # check if the input object(s) is(are) defined in all the studies + defined <- isDefined(datasources, x) + + # call the internal function that checks the input object is of the same class in all studies. + typ <- checkClass(datasources, x) + + # if the input object is not a matrix or a dataframe stop + if(!('data.frame' %in% typ) & !('matrix' %in% typ)){ + stop("The input vector must be of type 'data.frame' or a 'matrix'!", call.=FALSE) + } + cally <- call("colnamesDS", x) column_names <- DSI::datashield.aggregate(datasources, cally) diff --git a/R/ds.ranksSecure.R b/R/ds.ranksSecure.R deleted file mode 100644 index 8ffa6a97..00000000 --- a/R/ds.ranksSecure.R +++ /dev/null @@ -1,585 +0,0 @@ -# ds.ranksSecure -#' @title Secure ranking of a vector across all sources -#' @description Securely generate the ranks of a numeric vector and estimate -#' true global quantiles across all data sources simultaneously -#' @details ds.ranksSecure is a clientside function which calls a series of -#' other clientside and serverside functions to securely generate the global -#' ranks of a numeric vector "V2BR" (vector to be ranked) -#' in order to set up analyses on V2BR based on -#' non-parametric methods, some types of survival analysis and to derive true -#' global quantiles (such as the median, lower (25%) and upper (75%) quartiles, -#' and the 95% and 97.5% quantiles) across all sources simultaneously. These -#' global quantiles are, in general, different to the mean or median of the -#' equivalent quantiles calculated independently in each data source separately. -#' For more details about the cluster of functions that collectively -#' enable secure global ranking and estimation of global quantiles see the -#' associated document entitled "secure.global.ranking.docx". -#' @param input.var.name a character string in a format that can pass through -#' the DataSHIELD R parser which specifies the name of the vector to be ranked. -#' Needs to have same name in each data source. -#' @param quantiles.for.estimation one of a restricted set of character strings. -#' To mitigate disclosure risk only the following set of quantiles can be -#' generated: c(0.025,0.05,0.10,0.20,0.25,0.30,0.3333,0.40,0.50,0.60,0.6667, -#' 0.70,0.75,0.80,0.90,0.95,0.975). The allowable formats for the argument -#' are of the general form: "0.025-0.975" where the first number is the lowest -#' quantile to be estimated and the second number is the equivalent highest -#' quantile to estimate. These two quantiles are then estimated along with -#' all allowable quantiles in between. The allowable argument values are then: -#' "0.025-0.975", "0.05-0.95", "0.10-0.90", "0.20-0.80". Two alternative values -#' are "quartiles" i.e. c(0.25,0.50,0.75), and "median" i.e. c(0.50). The -#' default value is "0.05-0.95". If the sample size is so small that an extreme -#' quartile could be disclosive the function will be terminated and an error -#' message returned telling you that you might try using an argument with a -#' narrower set of quantiles. This disclosure trap will be triggered if the -#' total number of subjects across all studies divided by the total number -#' of quantile values being estimated is less than or equal to nfilter.tab -#' (the minimum cell size in a contingency table). -#' @param generate.quantiles a logical value indicating whether the -#' ds.ranksSecure function should carry on to estimate the key quantile -#' values specified by argument or should stop -#' once the global ranks have been created and written to the serverside. -#' Default is TRUE and as the key quantiles are generally non-disclosive this -#' is usually the setting to use. But, if there is some abnormal configuration -#' of the clusters of values that are being ranked such that some values are -#' treated as being missing and the processing stops, then setting -#' generate.quantiles to FALSE allows the generation of ranks to complete so -#' they can then be used for non-parametric analysis, even if the key values -#' cannot be estimated. A real example of an unusual configuration was in a -#' reasonably large dataset of survival times, where a substantial proportion -#' of survival profiles were censored at precisely 10 years. This meant that -#' the 97.5% percentile could not be separated from the 95% percentile and so -#' the former was allocated the value NA. This stopped processing of the ranks -#' which could then be enabled by setting generate.quantiles to FALSE. However, -#' if this problem is detected an error message is returned which indicates that -#' in some cases (as in this case in fact) the problem can be circumvented -#' by selecting a narrow range of key quantiles to estimate. In this case, in -#' fact, this simply required changing the argument -#' from "0.025-0.975" to "0.05-0.95". -#' @param output.ranks.df a character string in a format that can pass through -#' the DataSHIELD R parser which specifies an optional name for the -#' data.frame written to the serverside on each data source that contains -#' 11 of the key output variables from the ranking procedure pertaining to that -#' particular data source. This includes the global ranks and quantiles of each -#' value of the V2BR (i.e. the values are ranked across all studies -#' simultaneously). If no name is specified, the default name -#' is allocated as "full.ranks.df". This data.frame contains disclosive -#' information and cannot therefore be passed to the clientside. -#' @param summary.output.ranks.df a character string in a format that can pass through -#' the DataSHIELD R parser which specifies an optional name for the summary -#' data.frame written to the serverside on each data source that contains -#' 5 of the key output variables from the ranking procedure pertaining to that -#' particular data source. This again includes the global ranks and quantiles of each -#' value of the V2BR (i.e. the values are ranked across all studies -#' simultaneously). If no name is specified, the default name -#' is allocated as "summary.ranks.df" This data.frame contains disclosive -#' information and cannot therefore be passed to the clientside. -#' @param ranks.sort.by a character string taking two possible values. These -#' are "ID.orig" and "vals.orig". These define the order in which the -#' output.ranks.df and summary.output.ranks.df data frames are presented. If -#' the argument is set as "ID.orig" the order of rows in the output data frames -#' are precisely the same as the order of original input vector that is being -#' ranked (i.e. V2BR). This means the ranks can simply be cbinded to the -#' matrix, data frame or tibble that originally included V2BR so it also -#' includes the corresponding ranks. If it is set as "vals.orig" the output -#' data frames are in order of increasing magnitude of the original values of -#' V2BR. Default value is "ID.orig". -#' @param shared.seed.value an integer value which is used to set the -#' random seed generator in each study. Initially, the seed is set to be the -#' same in all studies, so the order and parameters of the repeated -#' encryption procedures are precisely the same in each study. Then a -#' study-specific modification of the seed in each study ensures that the -#' procedures initially generating the masking pseudodata (which are then -#' subject to the same encryption procedures as the real data) are different -#' in each study. For further information about the shared seed and how we -#' intend to transmit it in the future, please see the detailed associated -#' header document. -#' @param synth.real.ratio an integer value specifying the ratio between the -#' number of masking pseudodata values generated in each study compared to -#' the number of real data values in V2BR. -#' @param NA.manage character string taking three possible values: "NA.delete", -#' "NA.low","NA.hi". This argument determines how missing values are managed -#' before ranking. "NA.delete" results in all missing values being removed -#' prior to ranking. This means that the vector of ranks in each study is -#' shorter than the original vector of V2BR values by an amount corresponding -#' to the number of missing values in V2BR in that study. Any rows containing -#' missing values in V2BR are simply removed before the ranking procedure is -#' initiated so the order of rows without missing data is unaltered. "NA.low" -#' indicates that all missing values should be converted to a new value that -#' has a meaningful magnitude that is lower (more negative or less positive) -#' than the lowest non-missing value of V2BR in any of the studies. This means, -#' for example, that if there are a total of M values of V2BR that are missing -#' across all studies, there will be a total of M observations that are ranked -#' lowest each with a rank of (M+1)/2. So if 7 are missing the lowest 7 ranks -#' will be 4,4,4,4,4,4,4 and if 4 are missing the first 4 ranks will be -#' 2.5,2.5,2.5,2.5. "NA.hi" indicates that all missing values should be -#' converted to a new value that has a meaningful magnitude that is higher(less -#' negative or more positive)than the highest non-missing value of V2BR in any -#' of the studies. This means, for example, that if there are a total of M -#' values of V2BR that are missing across all studies and N non-missing -#' values, there will be a total of M observations that are ranked -#' highest each with a rank of (2N-M+1)/2. So if there are a total of 1000 -#' V2BR values and 9 are missing the highest 9 ranks will be 996, 996 ... 996. -#' If NA.manage is either "NA.low" or "NA.hi" the final rank vector in each -#' study will have the same length as the V2BR vector in that same study. -#' 2.5,2.5,2.5,2.5. The default value of the "NA.manage" argument is "NA.delete" -#' @param rm.residual.objects logical value. Default = TRUE: at the beginning -#' and end of each run of ds.ranksSecure delete all extraneous objects that are -#' otherwise left behind. These are not usually needed, but could be of value -#' if one were investigating a problem with the ranking. FALSE: do not delete -#' the residual objects -#' @param monitor.progress logical value. Default = FALSE. If TRUE, function -#' outputs information about its progress. -#' @param datasources specifies the particular opal object(s) to use. If the -#' argument is not specified (NULL) the default set of opals -#' will be used. If is specified, it should be set without -#' inverted commas: e.g. datasources=opals.em. If you wish to -#' apply the function solely to e.g. the second opal server in a set of three, -#' the argument can be specified as: e.g. datasources=opals.em[2]. -#' If you wish to specify the first and third opal servers in a set you specify: -#' e.g. datasources=opals.em[c(1,3)]. -#' @return the data frame objects specified by the arguments output.ranks.df -#' and summary.output.ranks.df. These are written to the serverside in each -#' study. Provided the sort order is consistent these data frames can be cbinded -#' to any other data frame, matrix or tibble object containing V2BR or to the -#' V2BR vector itself, allowing the global ranks and quantiles to be -#' analysed rather than the actual values of V2BR. The last call within -#' the ds.ranksSecure function is to another clientside function -#' ds.extractQuantile (for further details see header for that function). -#' This returns an additional data frame "final.quantile.df" of which the first -#' column is the vector of key quantiles to be estimated as specified by the -#' argument and the second column is the list of -#' precise values of V2BR which correspond to these key quantiles. Because -#' the serverside functions associated with ds.ranksSecure and -#' ds.extractQuantile block potentially disclosive output (see information -#' for parameter quantiles.for.estimation) the "final.quantile.df" is returned -#' to the client allowing the direct reporting of V2BR values corresponding to -#' key quantiles such as the quartiles, the median and 95th percentile etc. In -#' addition a copy of the same data frame is also written to the serverside in -#' each study allowing the value of key quantiles such as the median to be -#' incorporated directly in calculations or transformations on the serverside -#' regardless in which study (or studies) those key quantile values have -#' occurred. -#' @author Paul Burton 4th November, 2021 -#' @export -ds.ranksSecure <- function(input.var.name=NULL, quantiles.for.estimation="0.05-0.95", - generate.quantiles=TRUE, - output.ranks.df=NULL, summary.output.ranks.df = NULL, - ranks.sort.by="ID.orig", shared.seed.value=10, - synth.real.ratio=2,NA.manage="NA.delete", - rm.residual.objects=TRUE, monitor.progress=FALSE, - datasources=NULL){ - - # look for DS connections - if(is.null(datasources)){ - datasources <- datashield.connections_find() - } - - datasources.in.current.function<-datasources - - # ensure datasources is a list of DSConnection-class - if(!(is.list(datasources) && all(unlist(lapply(datasources, function(d) {methods::is(d,"DSConnection")}))))){ - stop("The 'datasources' were expected to be a list of DSConnection-class objects", call.=FALSE) - } - - # check if user has provided the name of the column that holds the input variable - if(is.null(input.var.name)){ - stop("Please provide the name of the variable to be ranked across all sources collectively e.g. 'varname'", call.=FALSE) - } - - # check if user has provided the name of the input variable in a correct character format - if(!is.character(input.var.name)){ - stop("Please provide the name of the variable that is to be converted to a factor in character format e.g. 'varname'", call.=FALSE) - } - - # look for output df names and provide defaults if required - if(is.null(output.ranks.df)){ - output.ranks.df<-"full.ranks.df" - } - - if(is.null(summary.output.ranks.df)){ - summary.output.ranks.df<-"summary.ranks.df" - } - - if(is.null(synth.real.ratio)){ - synth.real.ratio<-10 - } - -#CLEAN UP RESIDUAL OBJECTS FROM PREVIOUS RUNS OF THE FUNCTION - if(rm.residual.objects) - { - #UNLESS THE IS FALSE, - #CLEAR UP ANY UNWANTED RESIDUAL OBJECTS FROM THE - #PREVIOUS RUNNING OF THE ds.ranksSecure FUNCTION IN THE - #CASE THAT PREVIOUS CALL STOPPED PREMATURELY AND SO THE - #FINAL CLEARING UP STEP WAS NOT INITIATED. - - rm.names<-c("blackbox.output.df", "blackbox.ranks.df", - "global.bounds.df", "global.ranks.quantiles.df", - "input.mean.sd.df", "input.ranks.sd.df", - output.ranks.df, "min.max.df", "numstudies.df", - "sR4.df", "sR5.df") - - #make transmittable via parser - rm.names.transmit <- paste(rm.names,collapse=",") - - calltext.rm <- call("rmDS", rm.names.transmit) - - rm.output <- DSI::datashield.aggregate(datasources, calltext.rm) - } - - if(monitor.progress){ -message("\n\nStep 1 of 8 complete: - Cleaned up residual output from - previous runs of ds.ranksSecure - - - ") - - - } - - #CALL AN INITIALISING SERVER SIDE FUNCTION (ASSIGN) - #TO IDENTIFY QUANTILES OF ORIGINAL VARIABLES IN EACH STUDY - #TO CREATE A STARTING CONFIGURATION THAT IS ALMOST CERTAINLY >1 WITH A SPAN OF 10 - - cally0 <- paste0('quantileMeanDS(', input.var.name, ')') - initialise.input.var <- DSI::datashield.aggregate(datasources, as.symbol(cally0)) - - - numstudies<-length(initialise.input.var) - numvals<-length(initialise.input.var[[1]]) - - q5.val<-NULL - q95.val<-NULL - mean.val<-NULL - - for(rr in 1:numstudies){ - q5.val<-c(q5.val,initialise.input.var[[rr]][1]) - q95.val<-c(q95.val,initialise.input.var[[rr]][numvals-1]) - mean.val<-c(mean.val,initialise.input.var[[rr]][numvals]) - } - - min.q5<-min(q5.val) - max.q95<-max(q95.val) - - max.sd.input.var<-(max.q95-min.q5)/(2*1.65) - mean.input.var<-mean(mean.val) - - input.mean.sd.df<-data.frame(cbind(mean.input.var,max.sd.input.var)) - - - #CALL CLIENTSIDE FUNCTION ds.dmtC2S TO RETURN VALUES TO SERVERSIDE - dsBaseClient::ds.dmtC2S(dfdata=input.mean.sd.df,newobj="input.mean.sd.df") - -if(monitor.progress){ -message("\n\nStep 2 of 8 complete: - Estimated mean and sd of - v2br to standardise initial values - - - ") - } - -#CALL minMaxRandDS FUNCTION (AGGREGATE) TO CREATE MIN AND MAX VALUES -#FOR INPUT VARIABLE WITH RANDOM NOISE ON TOP. ACTUAL VALUE DOESN'T -#MATTER AS IT IS ONLY TO ALLOCATE LOW AND HIGH VALUES TO NA WHEN -#THEY ARE TO BE INCLUDED IN THE RANKING - - calltext0 <- call("minMaxRandDS",input.var.name) - rand.min.max<-DSI::datashield.aggregate(datasources, calltext0) - - - numstudies<-length(rand.min.max) - - rand.min.min<-NULL - rand.max.max<-NULL - - for(ss in 1:numstudies){ - rand.min.min<-c(rand.min.min,rand.min.max[[ss]][1]) - rand.max.max<-c(rand.max.max,rand.min.max[[ss]][2]) - } - - min.min.final<-min(rand.min.min) - max.max.final<-min(rand.max.max) - - min.max.df<-data.frame(cbind(min.min.final,max.max.final)) - -#CALL CLIENTSIDE FUNCTION ds.dmtC2S TO RETURN VALUES TO SERVERSIDE -dsBaseClient::ds.dmtC2S(dfdata=min.max.df,newobj="min.max.df") - -if(monitor.progress){ -message("\n\nStep 3 of 8 complete: - Generated ultra max and ultra min values to allocate to - missing values if is NA.hi or NA.low - - - ") -} - - #CALL THE FIRST SERVER SIDE FUNCTION (ASSIGN) - #WRITES ENCRYPTED DATA TO SERVERSIDE OBJECT "blackbox.output.df" - calltext1 <- call("blackBoxDS", input.var.name=input.var.name, - #max.sd.input.var=input.mean.sd.df$max.sd.input.var, - #mean.input.var=input.mean.sd.df$mean.input.var, - shared.seedval=shared.seed.value,synth.real.ratio,NA.manage) - DSI::datashield.assign(datasources, "blackbox.output.df", calltext1) - -if(monitor.progress){ -message("\n\nStep 4 of 8 complete: - Pseudo data synthesised,first set of rank-consistent - transformations complete and blackbox.output.df created - - - ") - } - - #CALL THE SECOND SERVER SIDE FUNCTION (AGGREGATE) - #RETURN ENCRYPTED DATA IN "blackbox.output.df" TO CLIENTSIDE - calltext2 <- call("ranksSecureDS1") - blackbox.output<-DSI::datashield.aggregate(datasources, calltext2) - - numstudies<-length(blackbox.output) - - studyid<-rep(1,nrow(blackbox.output[[1]])) - - sR3.df<-data.frame(cbind(blackbox.output[[1]],studyid)) - - - if(numstudies>=1) - { - for(ss in 2:numstudies) - { - studyid<-rep(ss,nrow(blackbox.output[[ss]])) - - temp.df<-data.frame(cbind(blackbox.output[[ss]],studyid)) - sR3.df<-rbind(sR3.df,temp.df) - } - } - colnames(sR3.df)<-c(colnames(blackbox.output[[1]]),"studyid") - - ord.global.val<-order(sR3.df$encrypted.var) - sR3.df<-sR3.df[ord.global.val,] - global.rank<-rank(sR3.df$encrypted.var) - sR3.sort.global.val.df<-data.frame(cbind(sR3.df,global.rank)) - - - #CALL CLIENTSIDE FUNCTION ds.dmtC2S TO RETURN df TO SERVERSIDE - for(ss in 1:3) - { - sR4.df<-sR3.sort.global.val.df[sR3.sort.global.val.df$studyid==ss,] - dsBaseClient::ds.dmtC2S(dfdata=sR4.df,newobj="sR4.df", - datasources = datasources.in.current.function[ss]) - } - - numstudies.df<-data.frame(numstudies) - - #CALL CLIENTSIDE FUNCTION ds.dmtC2S TO RETURN numstudies TO SERVERSIDE - dsBaseClient::ds.dmtC2S(dfdata=numstudies.df,newobj="numstudies.df", - datasources = datasources.in.current.function) - - - #CALL THE THIRD SERVER SIDE FUNCTION (ASSIGN) - #SELECTS ENCRYPTED DATA FOR REAL SUBJECTS IN EACH - #STUDY SPECIFIC sR4.df AND WRITES AS sR5.df ON SERVERSIDE - calltext3 <- call("ranksSecureDS2") - DSI::datashield.assign(datasources,"sR5.df",calltext3) - - ds.make("sR5.df$global.rank","testvar.ranks") - -if(monitor.progress){ - message("\n\nStep 5 of 8 complete: - Global ranks generated and pseudodata stripped out. Now ready - to proceed to transformation of global ranks - - - ") - } - - input.ranks.name<-"testvar.ranks" - - cally2 <- paste0('quantileMeanDS(', input.ranks.name, ')') - initialise.input.ranks <- DSI::datashield.aggregate(datasources, as.symbol(cally2)) - - - numstudies<-length(initialise.input.ranks) - numvals<-length(initialise.input.ranks[[1]]) - - q5.val<-NULL - q95.val<-NULL - mean.ranks<-NULL - - for(rr in 1:numstudies){ - q5.val<-c(q5.val,initialise.input.ranks[[rr]][1]) - q95.val<-c(q95.val,initialise.input.ranks[[rr]][numvals-1]) - mean.ranks<-c(mean.ranks,initialise.input.ranks[[rr]][numvals]) - } - - min.q5<-min(q5.val) - max.q95<-max(q95.val) - - max.sd.input.ranks<-(max.q95-min.q5)/(2*1.65) - mean.input.ranks<-mean(mean.ranks) - - input.ranks.sd.df<-data.frame(cbind(mean.input.ranks,max.sd.input.ranks)) - - - #CALL CLIENTSIDE FUNCTION ds.dmtC2S TO RETURN VALUES TO SERVERSIDE - dsBaseClient::ds.dmtC2S(dfdata=input.ranks.sd.df,newobj="input.ranks.sd.df") - - - - #CALLS FOURTH SERVER SIDE FUNCTION (ASSIGN) - #THAT IS A MODIFIED VERSION OF blackBoxDS THAT - #ENCRYPTS JUST THE RANKS OF THE REAL DATA AND WRITES - #TO blackbox.ranks.df ON THE SERVERSIDE - #THIS VERSION (blackBoxDS2) CREATES NO SYNTHETIC DATA TO - #CONCEAL VALUES - - - calltext4 <- call("blackBoxRanksDS","testvar.ranks", - shared.seedval=shared.seed.value) - - DSI::datashield.assign(datasources, "blackbox.ranks.df", calltext4) - -if(monitor.progress){ - message("\n\nStep 6 of 8 complete: - Rank-consistent transformations of global ranks complete - and blackbox.ranks.df created - - - ") - } - - - - #CALL THE FIFTH SERVER SIDE FUNCTION (AGGREGATE) - #SEND NON-DISCLOSIVE ELEMENTS OF (ENCRYPTED) DATA IN "blackbox.ranks.df" - #TO CLIENTSIDE - - calltext5 <- call("ranksSecureDS3") - blackbox.ranks.output<-DSI::datashield.aggregate(datasources, calltext5) - - numstudies<-length(blackbox.ranks.output) - - sR6.df<-blackbox.ranks.output[[1]] - - - if(numstudies>=1) - { - for(ss in 2:numstudies) - { - sR6.df<-rbind(sR6.df,blackbox.ranks.output[[ss]]) - } - } - sR6.df<-data.frame(sR6.df) - colnames(sR6.df)<-c(colnames(blackbox.ranks.output[[1]])) - - - #Rank encrypted ranks across all studies - real.ranks.global<-rank(sR6.df$encrypted.ranks) - real.quantiles.global<-real.ranks.global/length(real.ranks.global) - sR7.df<-cbind(sR6.df,real.ranks.global,real.quantiles.global) - ord.by.real.ranks.global<-order(sR7.df$real.ranks.global) - sR7.df.by.real.ranks.global<-sR7.df[ord.by.real.ranks.global,] - - - - #CALL CLIENTSIDE FUNCTION ds.dmtC2S TO RETURN sR7.df TO SERVERSIDE - for(ss in 1:3) - { - sR7.df.study.specific<-sR7.df.by.real.ranks.global[sR7.df.by.real.ranks.global$studyid==ss,] - dsBaseClient::ds.dmtC2S(dfdata=sR7.df.study.specific,newobj="global.ranks.quantiles.df", - datasources = datasources.in.current.function[ss]) - } - - - - - #CALL THE SIXTH SERVER SIDE FUNCTION (ASSIGN) - #TAKE ALLOCATED GLOBAL RANKS FROM sR7.df APPEND TO blackbox.ranks.df - #TO CREATE sR9.df - - calltext6 <- call("ranksSecureDS4",ranks.sort.by) - DSI::datashield.assign(datasources,output.ranks.df,calltext6) - - - calltext7 <- call("ranksSecureDS5", output.ranks.df) - DSI::datashield.assign(datasources,summary.output.ranks.df, calltext7) - - if(monitor.progress){ - message("\n\nStep 7 of 8 complete: - Final global ranking of values in v2br complete and - written to each serverside as appropriate - - - ",summary.output.ranks.df) - } - - - - #CLEAN UP UNWANTED RESIDUAL OBJECTS FROM THE RUNNING OF ds.ranksSecure - #EXCEPT FOR OBJECTS CREATED BY ds.extractQuantiles - - if(rm.residual.objects) - { - #UNLESS THE IS FALSE, - #CLEAR UP ANY UNWANTED RESIDUAL OBJECTS - - rm.names.rS<-c("blackbox.output.df", "blackbox.ranks.df", - "global.ranks.quantiles.df","input.mean.sd.df", "input.ranks.sd.df", - output.ranks.df, "min.max.df", "numstudies.df", - "sR4.df", "sR5.df") - - #make transmittable via parser - rm.names.rS.transmit <- paste(rm.names.rS,collapse=",") - - calltext.rm.rS <- call("rmDS", rm.names.rS.transmit) - -# rm.output.rS <- - DSI::datashield.aggregate(datasources, calltext.rm.rS) - - } - -if(monitor.progress && rm.residual.objects){ - message("\n\nStep 8 of 8 complete: - Cleaned up residual output from running ds.ranksSecure - - - ") - } - - if(monitor.progress && !rm.residual.objects){ - message("\n\nStep 8 of 8 complete: - Residual output from running ds.ranksSecure NOT deleted - - - ") - } - - - -if(!generate.quantiles){ - message("\n\n\n"," FINAL RANKING PROCEDURES COMPLETE: - PRIMARY RANKING OUTPUT IS IN DATA FRAME",summary.output.ranks.df, - " - WHICH IS SORTED BY",ranks.sort.by," AND HAS BEEN - WRITTEN TO THE SERVERSIDE\n\n\n\n") - - info.message<-"As the argument was set to FALSE no quantiles have been estimated.Please set argument to TRUE if you want to estimate quantiles such as median, quartiles and 90th percentile" - message("\n\n",info.message,"\n\n") - return(info.message) - } - -final.quantile.df<- - ds.extractQuantiles( - quantiles.for.estimation, - summary.output.ranks.df, - ranks.sort.by, - rm.residual.objects, - extract.datasources=NULL) - - return(final.quantile.df) -} - -########################################## -#ds.ranksSecure diff --git a/armadillo_azure-pipelines.yml b/armadillo_azure-pipelines.yml index 4ff1f497..480c55f5 100644 --- a/armadillo_azure-pipelines.yml +++ b/armadillo_azure-pipelines.yml @@ -58,10 +58,10 @@ schedules: - master always: true - cron: "0 2 * * *" - displayName: Nightly build - v6.3.5-dev + displayName: Nightly build - v7.0-dev-feat/performance branches: include: - - v6.3.5-dev + - v7.0-dev-feat/performance always: true ######################################################################################### @@ -235,7 +235,7 @@ jobs: curl -u admin:admin -X GET http://localhost:8080/packages - curl -u admin:admin --max-time 300 -v -H 'Content-Type: multipart/form-data' -F "file=@dsBase_6.3.5-permissive.tar.gz" -X POST http://localhost:8080/install-package + curl -u admin:admin --max-time 300 -v -H 'Content-Type: multipart/form-data' -F "file=@dsBase_7.0-dev-feat_performance-permissive.tar.gz" -X POST http://localhost:8080/install-package sleep 60 docker container restart dsbaseclient_armadillo_1 diff --git a/azure-pipelines.yml b/azure-pipelines.yml index db3d7a18..a6e17a48 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -44,10 +44,10 @@ schedules: - master always: true - cron: "0 2 * * *" - displayName: Nightly build - v6.3.5-dev + displayName: Nightly build - v7.0-dev-feat/performance branches: include: - - v6.3.5-dev + - v7.0-dev-feat/performance always: true ######################################################################################### @@ -216,7 +216,7 @@ jobs: - bash: | R -q -e "library(opalr); opal <- opal.login(username = 'administrator', password = 'datashield_test&', url = 'https://localhost:8443', opts = list(ssl_verifyhost=0, ssl_verifypeer=0)); opal.put(opal, 'system', 'conf', 'general', '_rPackage'); opal.logout(o)" - R -q -e "library(opalr); opal <- opal.login('administrator','datashield_test&', url='https://localhost:8443/', opts = list(ssl_verifyhost=0, ssl_verifypeer=0)); dsadmin.install_github_package(opal, 'dsBase', username = 'datashield', ref = 'v6.3.5-dev'); opal.logout(opal)" + R -q -e "library(opalr); opal <- opal.login('administrator','datashield_test&', url='https://localhost:8443/', opts = list(ssl_verifyhost=0, ssl_verifypeer=0)); dsadmin.install_github_package(opal, 'dsBase', username = 'datashield', ref = '7.0-dev-feat_performance'); opal.logout(opal)" sleep 60 diff --git a/docs/reference/ds.colnames.html b/docs/reference/ds.colnames.html index 4e90a603..196ee293 100644 --- a/docs/reference/ds.colnames.html +++ b/docs/reference/ds.colnames.html @@ -94,28 +94,28 @@

Examples

require('dsBaseClient') builder <- DSI::newDSLoginBuilder() - builder$append(server = "study1", - url = "http://192.168.56.100:8080/", - user = "administrator", password = "datashield_test&", + builder$append(server = "study1", + url = "http://192.168.56.100:8080/", + user = "administrator", password = "datashield_test&", table = "CNSIM.CNSIM1", driver = "OpalDriver") - builder$append(server = "study2", - url = "http://192.168.56.100:8080/", - user = "administrator", password = "datashield_test&", + builder$append(server = "study2", + url = "http://192.168.56.100:8080/", + user = "administrator", password = "datashield_test&", table = "CNSIM.CNSIM2", driver = "OpalDriver") builder$append(server = "study3", - url = "http://192.168.56.100:8080/", - user = "administrator", password = "datashield_test&", + url = "http://192.168.56.100:8080/", + user = "administrator", password = "datashield_test&", table = "CNSIM.CNSIM3", driver = "OpalDriver") logindata <- builder$build() - + # Log onto the remote Opal training servers - connections <- DSI::datashield.login(logins = logindata, assign = TRUE, symbol = "D") + connections <- DSI::datashield.login(logins = logindata, assign = TRUE, symbol = "D") # Getting column names of the R objects stored in the server-side ds.colnames(x = "D", datasources = connections[1]) #only the first server ("study1") is used # Clear the Datashield R sessions and logout - datashield.logout(connections) + datashield.logout(connections) } # } diff --git a/docs/reference/ds.ranksSecure.html b/docs/reference/ds.ranksSecure.html deleted file mode 100644 index 346d484a..00000000 --- a/docs/reference/ds.ranksSecure.html +++ /dev/null @@ -1,296 +0,0 @@ - -Secure ranking of a vector across all sources — ds.ranksSecure • dsBaseClient - - -
-
- - - -
-
- - -
-

Securely generate the ranks of a numeric vector and estimate -true global quantiles across all data sources simultaneously

-
- -
-
ds.ranksSecure(
-  input.var.name = NULL,
-  quantiles.for.estimation = "0.05-0.95",
-  generate.quantiles = TRUE,
-  output.ranks.df = NULL,
-  summary.output.ranks.df = NULL,
-  ranks.sort.by = "ID.orig",
-  shared.seed.value = 10,
-  synth.real.ratio = 2,
-  NA.manage = "NA.delete",
-  rm.residual.objects = TRUE,
-  monitor.progress = FALSE,
-  datasources = NULL
-)
-
- -
-

Arguments

- - -
input.var.name
-

a character string in a format that can pass through -the DataSHIELD R parser which specifies the name of the vector to be ranked. -Needs to have same name in each data source.

- - -
quantiles.for.estimation
-

one of a restricted set of character strings. -To mitigate disclosure risk only the following set of quantiles can be -generated: c(0.025,0.05,0.10,0.20,0.25,0.30,0.3333,0.40,0.50,0.60,0.6667, -0.70,0.75,0.80,0.90,0.95,0.975). The allowable formats for the argument -are of the general form: "0.025-0.975" where the first number is the lowest -quantile to be estimated and the second number is the equivalent highest -quantile to estimate. These two quantiles are then estimated along with -all allowable quantiles in between. The allowable argument values are then: -"0.025-0.975", "0.05-0.95", "0.10-0.90", "0.20-0.80". Two alternative values -are "quartiles" i.e. c(0.25,0.50,0.75), and "median" i.e. c(0.50). The -default value is "0.05-0.95". If the sample size is so small that an extreme -quartile could be disclosive the function will be terminated and an error -message returned telling you that you might try using an argument with a -narrower set of quantiles. This disclosure trap will be triggered if the -total number of subjects across all studies divided by the total number -of quantile values being estimated is less than or equal to nfilter.tab -(the minimum cell size in a contingency table).

- - -
generate.quantiles
-

a logical value indicating whether the -ds.ranksSecure function should carry on to estimate the key quantile -values specified by argument <quantiles.for.estimation> or should stop -once the global ranks have been created and written to the serverside. -Default is TRUE and as the key quantiles are generally non-disclosive this -is usually the setting to use. But, if there is some abnormal configuration -of the clusters of values that are being ranked such that some values are -treated as being missing and the processing stops, then setting -generate.quantiles to FALSE allows the generation of ranks to complete so -they can then be used for non-parametric analysis, even if the key values -cannot be estimated. A real example of an unusual configuration was in a -reasonably large dataset of survival times, where a substantial proportion -of survival profiles were censored at precisely 10 years. This meant that -the 97.5 -the former was allocated the value NA. This stopped processing of the ranks -which could then be enabled by setting generate.quantiles to FALSE. However, -if this problem is detected an error message is returned which indicates that -in some cases (as in this case in fact) the problem can be circumvented -by selecting a narrow range of key quantiles to estimate. In this case, in -fact, this simply required changing the <quantiles.for.estimation> argument -from "0.025-0.975" to "0.05-0.95".

- - -
output.ranks.df
-

a character string in a format that can pass through -the DataSHIELD R parser which specifies an optional name for the -data.frame written to the serverside on each data source that contains -11 of the key output variables from the ranking procedure pertaining to that -particular data source. This includes the global ranks and quantiles of each -value of the V2BR (i.e. the values are ranked across all studies -simultaneously). If no name is specified, the default name -is allocated as "full.ranks.df". This data.frame contains disclosive -information and cannot therefore be passed to the clientside.

- - -
summary.output.ranks.df
-

a character string in a format that can pass through -the DataSHIELD R parser which specifies an optional name for the summary -data.frame written to the serverside on each data source that contains -5 of the key output variables from the ranking procedure pertaining to that -particular data source. This again includes the global ranks and quantiles of each -value of the V2BR (i.e. the values are ranked across all studies -simultaneously). If no name is specified, the default name -is allocated as "summary.ranks.df" This data.frame contains disclosive -information and cannot therefore be passed to the clientside.

- - -
ranks.sort.by
-

a character string taking two possible values. These -are "ID.orig" and "vals.orig". These define the order in which the -output.ranks.df and summary.output.ranks.df data frames are presented. If -the argument is set as "ID.orig" the order of rows in the output data frames -are precisely the same as the order of original input vector that is being -ranked (i.e. V2BR). This means the ranks can simply be cbinded to the -matrix, data frame or tibble that originally included V2BR so it also -includes the corresponding ranks. If it is set as "vals.orig" the output -data frames are in order of increasing magnitude of the original values of -V2BR. Default value is "ID.orig".

- - -
shared.seed.value
-

an integer value which is used to set the -random seed generator in each study. Initially, the seed is set to be the -same in all studies, so the order and parameters of the repeated -encryption procedures are precisely the same in each study. Then a -study-specific modification of the seed in each study ensures that the -procedures initially generating the masking pseudodata (which are then -subject to the same encryption procedures as the real data) are different -in each study. For further information about the shared seed and how we -intend to transmit it in the future, please see the detailed associated -header document.

- - -
synth.real.ratio
-

an integer value specifying the ratio between the -number of masking pseudodata values generated in each study compared to -the number of real data values in V2BR.

- - -
NA.manage
-

character string taking three possible values: "NA.delete", -"NA.low","NA.hi". This argument determines how missing values are managed -before ranking. "NA.delete" results in all missing values being removed -prior to ranking. This means that the vector of ranks in each study is -shorter than the original vector of V2BR values by an amount corresponding -to the number of missing values in V2BR in that study. Any rows containing -missing values in V2BR are simply removed before the ranking procedure is -initiated so the order of rows without missing data is unaltered. "NA.low" -indicates that all missing values should be converted to a new value that -has a meaningful magnitude that is lower (more negative or less positive) -than the lowest non-missing value of V2BR in any of the studies. This means, -for example, that if there are a total of M values of V2BR that are missing -across all studies, there will be a total of M observations that are ranked -lowest each with a rank of (M+1)/2. So if 7 are missing the lowest 7 ranks -will be 4,4,4,4,4,4,4 and if 4 are missing the first 4 ranks will be -2.5,2.5,2.5,2.5. "NA.hi" indicates that all missing values should be -converted to a new value that has a meaningful magnitude that is higher(less -negative or more positive)than the highest non-missing value of V2BR in any -of the studies. This means, for example, that if there are a total of M -values of V2BR that are missing across all studies and N non-missing -values, there will be a total of M observations that are ranked -highest each with a rank of (2N-M+1)/2. So if there are a total of 1000 -V2BR values and 9 are missing the highest 9 ranks will be 996, 996 ... 996. -If NA.manage is either "NA.low" or "NA.hi" the final rank vector in each -study will have the same length as the V2BR vector in that same study. -2.5,2.5,2.5,2.5. The default value of the "NA.manage" argument is "NA.delete"

- - -
rm.residual.objects
-

logical value. Default = TRUE: at the beginning -and end of each run of ds.ranksSecure delete all extraneous objects that are -otherwise left behind. These are not usually needed, but could be of value -if one were investigating a problem with the ranking. FALSE: do not delete -the residual objects

- - -
monitor.progress
-

logical value. Default = FALSE. If TRUE, function -outputs information about its progress.

- - -
datasources
-

specifies the particular opal object(s) to use. If the -<datasources> argument is not specified (NULL) the default set of opals -will be used. If <datasources> is specified, it should be set without -inverted commas: e.g. datasources=opals.em. If you wish to -apply the function solely to e.g. the second opal server in a set of three, -the argument can be specified as: e.g. datasources=opals.em[2]. -If you wish to specify the first and third opal servers in a set you specify: -e.g. datasources=opals.em[c(1,3)].

- -
-
-

Value

-

the data frame objects specified by the arguments output.ranks.df -and summary.output.ranks.df. These are written to the serverside in each -study. Provided the sort order is consistent these data frames can be cbinded -to any other data frame, matrix or tibble object containing V2BR or to the -V2BR vector itself, allowing the global ranks and quantiles to be -analysed rather than the actual values of V2BR. The last call within -the ds.ranksSecure function is to another clientside function -ds.extractQuantile (for further details see header for that function). -This returns an additional data frame "final.quantile.df" of which the first -column is the vector of key quantiles to be estimated as specified by the -argument <quantiles.for.estimation> and the second column is the list of -precise values of V2BR which correspond to these key quantiles. Because -the serverside functions associated with ds.ranksSecure and -ds.extractQuantile block potentially disclosive output (see information -for parameter quantiles.for.estimation) the "final.quantile.df" is returned -to the client allowing the direct reporting of V2BR values corresponding to -key quantiles such as the quartiles, the median and 95th percentile etc. In -addition a copy of the same data frame is also written to the serverside in -each study allowing the value of key quantiles such as the median to be -incorporated directly in calculations or transformations on the serverside -regardless in which study (or studies) those key quantile values have -occurred.

-
-
-

Details

-

ds.ranksSecure is a clientside function which calls a series of -other clientside and serverside functions to securely generate the global -ranks of a numeric vector "V2BR" (vector to be ranked) -in order to set up analyses on V2BR based on -non-parametric methods, some types of survival analysis and to derive true -global quantiles (such as the median, lower (25 -and the 95 -global quantiles are, in general, different to the mean or median of the -equivalent quantiles calculated independently in each data source separately. -For more details about the cluster of functions that collectively -enable secure global ranking and estimation of global quantiles see the -associated document entitled "secure.global.ranking.docx".

-
-
-

Author

-

Paul Burton 4th November, 2021

-
- -
- -
- - -
- -
-

Site built with pkgdown 2.2.0.

-
- -
- - - - - - - - diff --git a/docs/reference/index.html b/docs/reference/index.html index 9b99643d..f315efd8 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -417,10 +417,6 @@

All functions ds.rUnif()

Generates Uniform distribution in the server-side

- -

ds.ranksSecure()

- -

Secure ranking of a vector across all sources

ds.rbind()

diff --git a/dsBase_6.3.5-permissive.tar.gz b/dsBase_7.0-dev-feat_performance-permissive.tar.gz similarity index 56% rename from dsBase_6.3.5-permissive.tar.gz rename to dsBase_7.0-dev-feat_performance-permissive.tar.gz index 3703315d..1fca22d0 100644 Binary files a/dsBase_6.3.5-permissive.tar.gz and b/dsBase_7.0-dev-feat_performance-permissive.tar.gz differ diff --git a/dsBase_6.3.5.tar.gz b/dsBase_7.0-dev-feat_performance.tar.gz similarity index 53% rename from dsBase_6.3.5.tar.gz rename to dsBase_7.0-dev-feat_performance.tar.gz index 0ea538d0..43e1ea28 100644 Binary files a/dsBase_6.3.5.tar.gz and b/dsBase_7.0-dev-feat_performance.tar.gz differ diff --git a/man/ds.colnames.Rd b/man/ds.colnames.Rd index 9460a567..e7391081 100644 --- a/man/ds.colnames.Rd +++ b/man/ds.colnames.Rd @@ -9,20 +9,20 @@ ds.colnames(x = NULL, datasources = NULL) \arguments{ \item{x}{a character string providing the name of the input data frame or matrix.} -\item{datasources}{a list of \code{\link[DSI]{DSConnection-class}} objects obtained after login. +\item{datasources}{a list of \code{\link[DSI]{DSConnection-class}} objects obtained after login. If the \code{datasources} argument is not specified the default set of connections will be used: see \code{\link[DSI]{datashield.connections_default}}.} } \value{ -\code{ds.colnames} returns the column names of +\code{ds.colnames} returns the column names of the specified server-side data frame or matrix. } \description{ -Retrieves column names of an R object on the server-side. +Retrieves column names of an R object on the server-side. This function is similar to R function \code{colnames}. } \details{ -The input is restricted to the object of type \code{data.frame} or \code{matrix}. +The input is restricted to the object of type \code{data.frame} or \code{matrix}. Server function called: \code{colnamesDS} } @@ -37,28 +37,28 @@ Server function called: \code{colnamesDS} require('dsBaseClient') builder <- DSI::newDSLoginBuilder() - builder$append(server = "study1", - url = "http://192.168.56.100:8080/", - user = "administrator", password = "datashield_test&", + builder$append(server = "study1", + url = "http://192.168.56.100:8080/", + user = "administrator", password = "datashield_test&", table = "CNSIM.CNSIM1", driver = "OpalDriver") - builder$append(server = "study2", - url = "http://192.168.56.100:8080/", - user = "administrator", password = "datashield_test&", + builder$append(server = "study2", + url = "http://192.168.56.100:8080/", + user = "administrator", password = "datashield_test&", table = "CNSIM.CNSIM2", driver = "OpalDriver") builder$append(server = "study3", - url = "http://192.168.56.100:8080/", - user = "administrator", password = "datashield_test&", + url = "http://192.168.56.100:8080/", + user = "administrator", password = "datashield_test&", table = "CNSIM.CNSIM3", driver = "OpalDriver") logindata <- builder$build() - + # Log onto the remote Opal training servers - connections <- DSI::datashield.login(logins = logindata, assign = TRUE, symbol = "D") + connections <- DSI::datashield.login(logins = logindata, assign = TRUE, symbol = "D") # Getting column names of the R objects stored in the server-side ds.colnames(x = "D", datasources = connections[1]) #only the first server ("study1") is used # Clear the Datashield R sessions and logout - datashield.logout(connections) + datashield.logout(connections) } } \seealso{ diff --git a/man/ds.ranksSecure.Rd b/man/ds.ranksSecure.Rd deleted file mode 100644 index 294a754f..00000000 --- a/man/ds.ranksSecure.Rd +++ /dev/null @@ -1,202 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/ds.ranksSecure.R -\name{ds.ranksSecure} -\alias{ds.ranksSecure} -\title{Secure ranking of a vector across all sources} -\usage{ -ds.ranksSecure( - input.var.name = NULL, - quantiles.for.estimation = "0.05-0.95", - generate.quantiles = TRUE, - output.ranks.df = NULL, - summary.output.ranks.df = NULL, - ranks.sort.by = "ID.orig", - shared.seed.value = 10, - synth.real.ratio = 2, - NA.manage = "NA.delete", - rm.residual.objects = TRUE, - monitor.progress = FALSE, - datasources = NULL -) -} -\arguments{ -\item{input.var.name}{a character string in a format that can pass through -the DataSHIELD R parser which specifies the name of the vector to be ranked. -Needs to have same name in each data source.} - -\item{quantiles.for.estimation}{one of a restricted set of character strings. -To mitigate disclosure risk only the following set of quantiles can be -generated: c(0.025,0.05,0.10,0.20,0.25,0.30,0.3333,0.40,0.50,0.60,0.6667, -0.70,0.75,0.80,0.90,0.95,0.975). The allowable formats for the argument -are of the general form: "0.025-0.975" where the first number is the lowest -quantile to be estimated and the second number is the equivalent highest -quantile to estimate. These two quantiles are then estimated along with -all allowable quantiles in between. The allowable argument values are then: -"0.025-0.975", "0.05-0.95", "0.10-0.90", "0.20-0.80". Two alternative values -are "quartiles" i.e. c(0.25,0.50,0.75), and "median" i.e. c(0.50). The -default value is "0.05-0.95". If the sample size is so small that an extreme -quartile could be disclosive the function will be terminated and an error -message returned telling you that you might try using an argument with a -narrower set of quantiles. This disclosure trap will be triggered if the -total number of subjects across all studies divided by the total number -of quantile values being estimated is less than or equal to nfilter.tab -(the minimum cell size in a contingency table).} - -\item{generate.quantiles}{a logical value indicating whether the -ds.ranksSecure function should carry on to estimate the key quantile -values specified by argument or should stop -once the global ranks have been created and written to the serverside. -Default is TRUE and as the key quantiles are generally non-disclosive this -is usually the setting to use. But, if there is some abnormal configuration -of the clusters of values that are being ranked such that some values are -treated as being missing and the processing stops, then setting -generate.quantiles to FALSE allows the generation of ranks to complete so -they can then be used for non-parametric analysis, even if the key values -cannot be estimated. A real example of an unusual configuration was in a -reasonably large dataset of survival times, where a substantial proportion -of survival profiles were censored at precisely 10 years. This meant that -the 97.5% percentile could not be separated from the 95% percentile and so -the former was allocated the value NA. This stopped processing of the ranks -which could then be enabled by setting generate.quantiles to FALSE. However, -if this problem is detected an error message is returned which indicates that -in some cases (as in this case in fact) the problem can be circumvented -by selecting a narrow range of key quantiles to estimate. In this case, in -fact, this simply required changing the argument -from "0.025-0.975" to "0.05-0.95".} - -\item{output.ranks.df}{a character string in a format that can pass through -the DataSHIELD R parser which specifies an optional name for the -data.frame written to the serverside on each data source that contains -11 of the key output variables from the ranking procedure pertaining to that -particular data source. This includes the global ranks and quantiles of each -value of the V2BR (i.e. the values are ranked across all studies -simultaneously). If no name is specified, the default name -is allocated as "full.ranks.df". This data.frame contains disclosive -information and cannot therefore be passed to the clientside.} - -\item{summary.output.ranks.df}{a character string in a format that can pass through -the DataSHIELD R parser which specifies an optional name for the summary -data.frame written to the serverside on each data source that contains -5 of the key output variables from the ranking procedure pertaining to that -particular data source. This again includes the global ranks and quantiles of each -value of the V2BR (i.e. the values are ranked across all studies -simultaneously). If no name is specified, the default name -is allocated as "summary.ranks.df" This data.frame contains disclosive -information and cannot therefore be passed to the clientside.} - -\item{ranks.sort.by}{a character string taking two possible values. These -are "ID.orig" and "vals.orig". These define the order in which the -output.ranks.df and summary.output.ranks.df data frames are presented. If -the argument is set as "ID.orig" the order of rows in the output data frames -are precisely the same as the order of original input vector that is being -ranked (i.e. V2BR). This means the ranks can simply be cbinded to the -matrix, data frame or tibble that originally included V2BR so it also -includes the corresponding ranks. If it is set as "vals.orig" the output -data frames are in order of increasing magnitude of the original values of -V2BR. Default value is "ID.orig".} - -\item{shared.seed.value}{an integer value which is used to set the -random seed generator in each study. Initially, the seed is set to be the -same in all studies, so the order and parameters of the repeated -encryption procedures are precisely the same in each study. Then a -study-specific modification of the seed in each study ensures that the -procedures initially generating the masking pseudodata (which are then -subject to the same encryption procedures as the real data) are different -in each study. For further information about the shared seed and how we -intend to transmit it in the future, please see the detailed associated -header document.} - -\item{synth.real.ratio}{an integer value specifying the ratio between the -number of masking pseudodata values generated in each study compared to -the number of real data values in V2BR.} - -\item{NA.manage}{character string taking three possible values: "NA.delete", -"NA.low","NA.hi". This argument determines how missing values are managed -before ranking. "NA.delete" results in all missing values being removed -prior to ranking. This means that the vector of ranks in each study is -shorter than the original vector of V2BR values by an amount corresponding -to the number of missing values in V2BR in that study. Any rows containing -missing values in V2BR are simply removed before the ranking procedure is -initiated so the order of rows without missing data is unaltered. "NA.low" -indicates that all missing values should be converted to a new value that -has a meaningful magnitude that is lower (more negative or less positive) -than the lowest non-missing value of V2BR in any of the studies. This means, -for example, that if there are a total of M values of V2BR that are missing -across all studies, there will be a total of M observations that are ranked -lowest each with a rank of (M+1)/2. So if 7 are missing the lowest 7 ranks -will be 4,4,4,4,4,4,4 and if 4 are missing the first 4 ranks will be -2.5,2.5,2.5,2.5. "NA.hi" indicates that all missing values should be -converted to a new value that has a meaningful magnitude that is higher(less -negative or more positive)than the highest non-missing value of V2BR in any -of the studies. This means, for example, that if there are a total of M -values of V2BR that are missing across all studies and N non-missing -values, there will be a total of M observations that are ranked -highest each with a rank of (2N-M+1)/2. So if there are a total of 1000 -V2BR values and 9 are missing the highest 9 ranks will be 996, 996 ... 996. -If NA.manage is either "NA.low" or "NA.hi" the final rank vector in each -study will have the same length as the V2BR vector in that same study. -2.5,2.5,2.5,2.5. The default value of the "NA.manage" argument is "NA.delete"} - -\item{rm.residual.objects}{logical value. Default = TRUE: at the beginning -and end of each run of ds.ranksSecure delete all extraneous objects that are -otherwise left behind. These are not usually needed, but could be of value -if one were investigating a problem with the ranking. FALSE: do not delete -the residual objects} - -\item{monitor.progress}{logical value. Default = FALSE. If TRUE, function -outputs information about its progress.} - -\item{datasources}{specifies the particular opal object(s) to use. If the - argument is not specified (NULL) the default set of opals -will be used. If is specified, it should be set without -inverted commas: e.g. datasources=opals.em. If you wish to -apply the function solely to e.g. the second opal server in a set of three, -the argument can be specified as: e.g. datasources=opals.em[2]. -If you wish to specify the first and third opal servers in a set you specify: -e.g. datasources=opals.em[c(1,3)].} -} -\value{ -the data frame objects specified by the arguments output.ranks.df -and summary.output.ranks.df. These are written to the serverside in each -study. Provided the sort order is consistent these data frames can be cbinded -to any other data frame, matrix or tibble object containing V2BR or to the -V2BR vector itself, allowing the global ranks and quantiles to be -analysed rather than the actual values of V2BR. The last call within -the ds.ranksSecure function is to another clientside function -ds.extractQuantile (for further details see header for that function). -This returns an additional data frame "final.quantile.df" of which the first -column is the vector of key quantiles to be estimated as specified by the -argument and the second column is the list of -precise values of V2BR which correspond to these key quantiles. Because -the serverside functions associated with ds.ranksSecure and -ds.extractQuantile block potentially disclosive output (see information -for parameter quantiles.for.estimation) the "final.quantile.df" is returned -to the client allowing the direct reporting of V2BR values corresponding to -key quantiles such as the quartiles, the median and 95th percentile etc. In -addition a copy of the same data frame is also written to the serverside in -each study allowing the value of key quantiles such as the median to be -incorporated directly in calculations or transformations on the serverside -regardless in which study (or studies) those key quantile values have -occurred. -} -\description{ -Securely generate the ranks of a numeric vector and estimate -true global quantiles across all data sources simultaneously -} -\details{ -ds.ranksSecure is a clientside function which calls a series of -other clientside and serverside functions to securely generate the global -ranks of a numeric vector "V2BR" (vector to be ranked) -in order to set up analyses on V2BR based on -non-parametric methods, some types of survival analysis and to derive true -global quantiles (such as the median, lower (25%) and upper (75%) quartiles, -and the 95% and 97.5% quantiles) across all sources simultaneously. These -global quantiles are, in general, different to the mean or median of the -equivalent quantiles calculated independently in each data source separately. -For more details about the cluster of functions that collectively -enable secure global ranking and estimation of global quantiles see the -associated document entitled "secure.global.ranking.docx". -} -\author{ -Paul Burton 4th November, 2021 -} diff --git a/opal_azure-pipelines.yml b/opal_azure-pipelines.yml index dab96a79..a9410f69 100644 --- a/opal_azure-pipelines.yml +++ b/opal_azure-pipelines.yml @@ -58,10 +58,10 @@ schedules: - master always: true - cron: "0 2 * * *" - displayName: Nightly build - v6.3.5-dev + displayName: Nightly build - v7.0-dev-feat/performance branches: include: - - v6.3.5-dev + - v7.0-dev-feat/performance always: true ######################################################################################### @@ -235,7 +235,7 @@ jobs: - bash: | R -q -e "library(opalr); opal <- opal.login(username = 'administrator', password = 'datashield_test&', url = 'https://localhost:8443', opts = list(ssl_verifyhost=0, ssl_verifypeer=0)); opal.put(opal, 'system', 'conf', 'general', '_rPackage'); opal.logout(o)" - R -q -e "library(opalr); opal <- opal.login('administrator','datashield_test&', url='https://localhost:8443/', opts = list(ssl_verifyhost=0, ssl_verifypeer=0)); dsadmin.install_github_package(opal, 'dsBase', username = 'datashield', ref = 'v6.3.5-dev'); opal.logout(opal)" + R -q -e "library(opalr); opal <- opal.login('administrator','datashield_test&', url='https://localhost:8443/', opts = list(ssl_verifyhost=0, ssl_verifypeer=0)); dsadmin.install_github_package(opal, 'dsBase', username = 'datashield', ref = '7.0-dev-feat/performance'); opal.logout(opal)" sleep 60 diff --git a/tests/testthat/perf_files/armadillo_azure-pipeline.csv b/tests/testthat/perf_files/armadillo_azure-pipeline.csv index e913cff9..03d36d8f 100644 --- a/tests/testthat/perf_files/armadillo_azure-pipeline.csv +++ b/tests/testthat/perf_files/armadillo_azure-pipeline.csv @@ -1,14 +1,14 @@ "refer_name","rate","lower_tolerance","upper_tolerance" -"conndisconn::perf::simple0","0.1225","0.5","2" -"ds.abs::perf::0","5.820","0.5","2" -"ds.asInteger::perf:0","5.328","0.5","2" -"ds.asList::perf:0","11.71","0.5","2" -"ds.asNumeric::perf:0","5.268","0.5","2" -"ds.assign::perf::0","9.229","0.5","2" -"ds.class::perf::combine:0","10.84","0.5","2" -"ds.colnames::perf:0","7.993","0.5","2" -"ds.exists::perf::combine:0","21.68","0.5","2" -"ds.length::perf::combine:0","21.84","0.5","2" -"ds.mean::perf::combine:0","22.01","0.5","2" -"ds.mean::perf::split:0","22.22","0.5","2" -"void::perf::void::0","48520.0","0.5","2" +"conndisconn::perf::simple0","0.1651","0.5","2" +"ds.abs::perf::0","6.273","0.5","2" +"ds.asInteger::perf:0","5.731","0.5","2" +"ds.asList::perf:0","12.74","0.5","2" +"ds.asNumeric::perf:0","5.637","0.5","2" +"ds.assign::perf::0","10.46","0.5","2" +"ds.class::perf::combine:0","12.69","0.5","2" +"ds.colnames::perf:0","9.518","0.5","2" +"ds.exists::perf::combine:0","25.33","0.5","2" +"ds.length::perf::combine:0","25.45","0.5","2" +"ds.mean::perf::combine:0","25.37","0.5","2" +"ds.mean::perf::split:0","25.74","0.5","2" +"void::perf::void::0","56310.0","0.5","2" diff --git a/tests/testthat/perf_files/default_perf_profile.csv b/tests/testthat/perf_files/default_perf_profile.csv index 9a649b88..9f1ae6e5 100644 --- a/tests/testthat/perf_files/default_perf_profile.csv +++ b/tests/testthat/perf_files/default_perf_profile.csv @@ -6,7 +6,7 @@ "ds.asNumeric::perf:0","2.185","0.5","2" "ds.assign::perf::0","5.490","0.5","2" "ds.class::perf::combine:0","4.760","0.5","2" -"ds.colnames::perf:0","9.942","0.5","2" +"ds.colnames::perf:0","4.218","0.5","2" "ds.exists::perf::combine:0","11.09","0.5","2" "ds.length::perf::combine:0","9.479","0.5","2" "ds.mean::perf::combine:0","9.650","0.5","2" diff --git a/tests/testthat/perf_files/opal_azure-pipeline.csv b/tests/testthat/perf_files/opal_azure-pipeline.csv index d75711a3..9f1ae6e5 100644 --- a/tests/testthat/perf_files/opal_azure-pipeline.csv +++ b/tests/testthat/perf_files/opal_azure-pipeline.csv @@ -6,7 +6,7 @@ "ds.asNumeric::perf:0","2.185","0.5","2" "ds.assign::perf::0","5.490","0.5","2" "ds.class::perf::combine:0","4.760","0.5","2" -"ds.colnames::perf:0","4.159","0.5","2" +"ds.colnames::perf:0","4.218","0.5","2" "ds.exists::perf::combine:0","11.09","0.5","2" "ds.length::perf::combine:0","9.479","0.5","2" "ds.mean::perf::combine:0","9.650","0.5","2" diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R index 512e649c..5980e9df 100644 --- a/tests/testthat/setup.R +++ b/tests/testthat/setup.R @@ -33,5 +33,6 @@ source("connection_to_datasets/init_testing_datasets.R") source("connection_to_datasets/init_studies_datasets.R") source("connection_to_datasets/init_discordant_datasets.R") source("connection_to_datasets/init_mediation_datasets.R") +options(datashield.errors.print = FALSE) # context("setup - done") diff --git a/tests/testthat/test-arg-ds.ranksSecure.R b/tests/testthat/test-arg-ds.ranksSecure.R deleted file mode 100644 index fc6b99e1..00000000 --- a/tests/testthat/test-arg-ds.ranksSecure.R +++ /dev/null @@ -1,102 +0,0 @@ -#------------------------------------------------------------------------------- -# Copyright (c) 2019-2022 University of Newcastle upon Tyne. All rights reserved. -# Copyright (c) 2022-2025 Arjuna Technologies, Newcastle upon Tyne. All rights reserved. -# -# This program and the accompanying materials -# are made available under the terms of the GNU Public License v3.0. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . -#------------------------------------------------------------------------------- - -# -# Set up -# - -# context("ds.ranksSecure::arg::setup") - -connect.all.datasets() - -test_that("setup", { - ds_expect_variables(c("D")) -}) - -# -# Tests -# - -# context("ds.ranksSecure::arg::missing variable") -test_that("missing variable", { - expect_error(ds.ranksSecure("LAB_MISSING"), "There are some DataSHIELD errors, list them with datashield.errors()", fixed = TRUE) - - res.errors <- DSI::datashield.errors() - - expect_length(res.errors, 3) - expect_match(res.errors$study1, "* object 'LAB_MISSING' not found") - expect_match(res.errors$study2, "* object 'LAB_MISSING' not found") - expect_match(res.errors$study3, "* object 'LAB_MISSING' not found") -}) - -# context("ds.ranksSecure::arg::NULL variable") -test_that("NULL variable", { - DSI::datashield.assign.expr(conns = ds.test_env$connections, symbol = "LAB_NULL", expr = "NULL") - - res.class <- ds.class("LAB_NULL") - - expect_length(res.class, 3) - expect_equal(res.class$study1, "NULL") - expect_equal(res.class$study2, "NULL") - expect_equal(res.class$study3, "NULL") - - expect_error(expect_warning(ds.ranksSecure("LAB_NULL"), "no non-missing arguments to max; returning -Inf", fixed = TRUE), "There are some DataSHIELD errors, list them with datashield.errors()", fixed = TRUE) - - res.errors <- DSI::datashield.errors() - - expect_length(res.errors, 3) - expect_match(res.errors$study1, "* Error in stats::complete.cases\\(input.var\\) : \n no input has determined the number of cases") - expect_match(res.errors$study2, "* Error in stats::complete.cases\\(input.var\\) : \n no input has determined the number of cases") - expect_match(res.errors$study3, "* Error in stats::complete.cases\\(input.var\\) : \n no input has determined the number of cases") -}) - -# context("ds.ranksSecure::arg::'text' variable") -test_that("'text' variable", { - expect_error(ds.ranksSecure("D$CHARACTER"), "There are some DataSHIELD errors, list them with datashield.errors()", fixed = TRUE) - - res.errors <- DSI::datashield.errors() - - expect_length(res.errors, 3) - expect_match(res.errors$study1, "* Error in \\(1 - h\\) \\* qs\\[i\\] : non-numeric argument to binary operator") - expect_match(res.errors$study2, "* Error in \\(1 - h\\) \\* qs\\[i\\] : non-numeric argument to binary operator") - expect_match(res.errors$study3, "* Error in \\(1 - h\\) \\* qs\\[i\\] : non-numeric argument to binary operator") -}) - -# context("ds.ranksSecure::arg::'logical' variable") -test_that("'logical' variable", { - expect_error(ds.ranksSecure("D$LOGICAL"), "FAILED: one of the extreme quantile estimates is NA probably because of a cluster of values at one end of the range of possible values. Try setting a narrower range of quantile values via the argument", fixed = TRUE) -}) - -# context("ds.ranksSecure::arg::'integer factor' variable") -test_that("'integer factor' variable", { - expect_error(expect_warning(ds.ranksSecure("D$INTEGER_FACTOR"), "no non-missing arguments to max; returning -Inf", fixed = TRUE), "There are some DataSHIELD errors, list them with datashield.errors()", fixed = TRUE) - - res.errors <- DSI::datashield.errors() - - expect_length(res.errors, 3) - expect_match(res.errors$study1, "* Error in stats::complete.cases\\(input.var\\) : \n no input has determined the number of cases") - expect_match(res.errors$study2, "* Error in stats::complete.cases\\(input.var\\) : \n no input has determined the number of cases") - expect_match(res.errors$study3, "* Error in stats::complete.cases\\(input.var\\) : \n no input has determined the number of cases") -}) - -# -# Done -# - -# context("ds.ranksSecure::arg::shutdown") - -test_that("setup", { - ds_expect_variables(c("D", "LAB_NULL", "input.mean.sd.df", "min.max.df", "summary.ranks.df", "testvar.ranks")) -}) - -disconnect.all.datasets() - -# context("ds.ranksSecure::arg::done") diff --git a/tests/testthat/test-smk-ds.colnames.R b/tests/testthat/test-smk-ds.colnames.R index ee98cc2e..b7d289ac 100644 --- a/tests/testthat/test-smk-ds.colnames.R +++ b/tests/testthat/test-smk-ds.colnames.R @@ -47,7 +47,7 @@ test_that("simple colnames", { test_that("fails if the object does not exist", { expect_error( ds.colnames("non_existing_df"), - regexp = "There are some DataSHIELD errors, list them with datashield.error()", + regexp = "The input object non_existing_df is not defined in sim1, sim2, sim3!", ignore.case = TRUE ) }) diff --git a/tests/testthat/test-smk-ds.listClientsideFunctions.R b/tests/testthat/test-smk-ds.listClientsideFunctions.R index 4e5f6791..a4a541e0 100644 --- a/tests/testthat/test-smk-ds.listClientsideFunctions.R +++ b/tests/testthat/test-smk-ds.listClientsideFunctions.R @@ -106,7 +106,6 @@ test_that("check results", { "ds.numNA", "ds.qlspline", "ds.quantileMean", - "ds.ranksSecure", "ds.rbind", "ds.rBinom", "ds.recodeLevels", diff --git a/tests/testthat/test-smk-ds.ranksSecure.R b/tests/testthat/test-smk-ds.ranksSecure.R deleted file mode 100644 index 43720c1f..00000000 --- a/tests/testthat/test-smk-ds.ranksSecure.R +++ /dev/null @@ -1,131 +0,0 @@ -#------------------------------------------------------------------------------- -# Copyright (c) 2019-2022 University of Newcastle upon Tyne. All rights reserved. -# Copyright (c) 2022-2025 Arjuna Technologies, Newcastle upon Tyne. All rights reserved. -# -# This program and the accompanying materials -# are made available under the terms of the GNU Public License v3.0. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . -#------------------------------------------------------------------------------- - -# -# Set up -# - -# context("ds.ranksSecure::smk::setup") - -connect.studies.dataset.cnsim(list("LAB_TRIG", "LAB_TSC")) - -test_that("setup", { - ds_expect_variables(c("D")) -}) - -# -# Tests -# - -# context("ds.ranksSecure::smk::continous, without NAs, all positive") -test_that("continous, without NAs, all positive", { - res.cc <- ds.completeCases("D$LAB_TSC", newobj="CC_LAB_TSC") - expect_equal(res.cc$validity.check, " appears valid in all sources") - - res.num.na <- ds.numNA("CC_LAB_TSC") - expect_length(res.num.na, 3) - expect_equal(res.num.na$sim1, 0) - expect_equal(res.num.na$sim2, 0) - expect_equal(res.num.na$sim3, 0) - - res <- ds.ranksSecure("CC_LAB_TSC") - - expect_length(res, 2) - expect_length(class(res), 1) - expect_true(all(class(res) %in% c('data.frame'))) - expect_length(names(res), 2) - expect_true(all(names(res) %in% c('evaluation.quantiles', 'final.quantile.vector'))) - - expect_length(res$evaluation.quantiles, 15) - expect_length(class(res$evaluation.quantiles), 1) - expect_true(all(class(res$evaluation.quantiles) %in% c('numeric'))) - expect_length(res$final.quantile.vector, 15) - expect_length(class(res$final.quantile.vector), 1) - expect_true(all(class(res$final.quantile.vector) %in% c('numeric'))) -}) - -# context("ds.ranksSecure::smk::continous, without NAs, with negative") -test_that("continous, without NAs, with negative", { - res.cc <- ds.completeCases("D$LAB_TRIG", newobj="CC_LAB_TRIG") - expect_equal(res.cc$validity.check, " appears valid in all sources") - - res.num.na <- ds.numNA("CC_LAB_TRIG") - expect_length(res.num.na, 3) - expect_equal(res.num.na$sim1, 0) - expect_equal(res.num.na$sim2, 0) - expect_equal(res.num.na$sim3, 0) - - res <- ds.ranksSecure("CC_LAB_TRIG") - - expect_length(res, 2) - expect_length(class(res), 1) - expect_true(all(class(res) %in% c('data.frame'))) - expect_length(names(res), 2) - expect_true(all(names(res) %in% c('evaluation.quantiles', 'final.quantile.vector'))) - - expect_length(res$evaluation.quantiles, 15) - expect_length(class(res$evaluation.quantiles), 1) - expect_true(all(class(res$evaluation.quantiles) %in% c('numeric'))) - expect_length(res$final.quantile.vector, 15) - expect_length(class(res$final.quantile.vector), 1) - expect_true(all(class(res$final.quantile.vector) %in% c('numeric'))) -}) - -# context("ds.ranksSecure::smk::continous, with NAs, all positive") -test_that("continous, with NAs, all positive", { - res <- ds.ranksSecure("D$LAB_TSC") - - expect_length(res, 2) - expect_length(class(res), 1) - expect_true(all(class(res) %in% c('data.frame'))) - expect_length(names(res), 2) - expect_true(all(names(res) %in% c('evaluation.quantiles', 'final.quantile.vector'))) - - expect_length(res$evaluation.quantiles, 15) - expect_length(class(res$evaluation.quantiles), 1) - expect_true(all(class(res$evaluation.quantiles) %in% c('numeric'))) - expect_length(res$final.quantile.vector, 15) - expect_length(class(res$final.quantile.vector), 1) - expect_true(all(class(res$final.quantile.vector) %in% c('numeric'))) -}) - -# context("ds.ranksSecure::smk::continous, with NAs, with negative") -test_that("continous, with NAs, with negative", { - res <- ds.ranksSecure("D$LAB_TRIG") - - expect_length(res, 2) - expect_length(class(res), 1) - expect_true(all(class(res) %in% c('data.frame'))) - expect_length(names(res), 2) - expect_true(all(names(res) %in% c('evaluation.quantiles', 'final.quantile.vector'))) - - expect_length(res$evaluation.quantiles, 15) - expect_length(class(res$evaluation.quantiles), 1) - expect_true(all(class(res$evaluation.quantiles) %in% c('numeric'))) - expect_length(res$final.quantile.vector, 15) - expect_length(class(res$final.quantile.vector), 1) - expect_true(all(class(res$final.quantile.vector) %in% c('numeric'))) -}) - - -# -# Done -# - -# context("ds.ranksSecure::smk::shutdown") - -test_that("setup", { - ds_expect_variables(c("D", "CC_LAB_TSC", "CC_LAB_TRIG", "final.quantile.df", "summary.ranks.df", "testvar.ranks")) -}) - -disconnect.studies.dataset.cnsim() - -# context("ds.ranksSecure::smk::done") diff --git a/tests/testthat/test-smk-isDefined.R b/tests/testthat/test-smk-isDefined.R index ab3f43b0..606fbdfe 100644 --- a/tests/testthat/test-smk-isDefined.R +++ b/tests/testthat/test-smk-isDefined.R @@ -14,7 +14,6 @@ # # context("isDefined::smk::setup") - connect.discordant.dataset.simple(list("A", "B", "C")) test_that("setup", { @@ -30,7 +29,7 @@ test_that("setup", { # context("isDefined::smk::default") test_that("default test, dataframe D", { res <- isDefined(ds.test_env$connections, "D") - + expect_length(class(res), 1) expect_true(all("list" %in% class(res))) expect_length(res, 3) @@ -45,7 +44,7 @@ test_that("default test, dataframe E", { test_that("default test, dataframe column E$A", { # expect_error(isDefined(ds.test_env$connections, "E$A"), "The input object E$A is not defined in discordant1, discordant2, discordant3!", fixed=TRUE) - expect_error(isDefined(ds.test_env$connections, "E$A"), "There are some DataSHIELD errors, list them with datashield.errors()", fixed=TRUE) + expect_error(isDefined(ds.test_env$connections, "E$A"), "There are some DataSHIELD errors", fixed=TRUE) err <- DSI::datashield.errors(); @@ -94,7 +93,7 @@ test_that("default test, dataframe columns D$A,D$B", { # context("isDefined::smk::error.message=FALSE") test_that("error.message=FALSE test, dataframe D", { res <- isDefined(ds.test_env$connections, "D", error.message = FALSE) - + expect_length(class(res), 1) expect_true(all("list" %in% class(res))) expect_length(res, 3) @@ -116,7 +115,7 @@ test_that("error.message=FALSE test, dataframe E", { test_that("error.message=FALSE test, dataframe column E$A", { # expect_error(isDefined(ds.test_env$connections, "E$A", error.message = FALSE), "The input object E$A is not defined in discordant1, discordant2, discordant3!", fixed=TRUE) - expect_error(isDefined(ds.test_env$connections, "E$A", error.message = FALSE), "There are some DataSHIELD errors, list them with datashield.errors()", fixed=TRUE) + expect_error(isDefined(ds.test_env$connections, "E$A", error.message = FALSE), "There are some DataSHIELD errors", fixed=TRUE) err <- DSI::datashield.errors(); @@ -200,7 +199,7 @@ test_that("error.message=FALSE test, dataframe columns D$A,D$B", { # context("isDefined::smk::error.message=TRUE") test_that("error.message=TRUE test, dataframe D", { res <- isDefined(ds.test_env$connections, "D", error.message = TRUE) - + expect_length(class(res), 1) expect_true(all("list" %in% class(res))) expect_length(res, 3) @@ -215,7 +214,7 @@ test_that("error.message=TRUE test, dataframe E", { test_that("error.message=TRUE test, dataframe column E$A", { # expect_error(isDefined(ds.test_env$connections, "E$A", error.message = TRUE), "The input object E$A is not defined in discordant1, discordant2, discordant3!", fixed=TRUE) - expect_error(isDefined(ds.test_env$connections, "E$A", error.message = TRUE), "There are some DataSHIELD errors, list them with datashield.errors()", fixed=TRUE) + expect_error(isDefined(ds.test_env$connections, "E$A", error.message = TRUE), "There are some DataSHIELD errors", fixed=TRUE) err <- DSI::datashield.errors(); diff --git a/tests/testthat/test-smk_expt-ds.ranksSecure.R b/tests/testthat/test-smk_expt-ds.ranksSecure.R index edb86cd2..4da33430 100644 --- a/tests/testthat/test-smk_expt-ds.ranksSecure.R +++ b/tests/testthat/test-smk_expt-ds.ranksSecure.R @@ -27,48 +27,48 @@ test_that("setup", { # context("ds.ranksSecure::smk_expt::continous, without NAs, all positive") test_that("continous, without NAs, all positive", { - res.cc <- ds.completeCases("D$LAB_TSC", newobj="CC_LAB_TSC") - expect_equal(res.cc$validity.check, " appears valid in all sources") - - res.num.na <- ds.numNA("CC_LAB_TSC") - expect_length(res.num.na, 3) - expect_equal(res.num.na$sim1, 0) - expect_equal(res.num.na$sim2, 0) - expect_equal(res.num.na$sim3, 0) - - res <- ds.ranksSecure("CC_LAB_TSC") - - expect_equal_to_reference(res, 'smk_expt-results/ds.ranksSecure-cont.rds') +# res.cc <- ds.completeCases("D$LAB_TSC", newobj="CC_LAB_TSC") +# expect_equal(res.cc$validity.check, " appears valid in all sources") +# +# res.num.na <- ds.numNA("CC_LAB_TSC") +# expect_length(res.num.na, 3) +# expect_equal(res.num.na$sim1, 0) +# expect_equal(res.num.na$sim2, 0) +# expect_equal(res.num.na$sim3, 0) +# +# res <- ds.ranksSecure("CC_LAB_TSC") +# +# expect_equal_to_reference(res, 'smk_expt-results/ds.ranksSecure-cont.rds') }) # context("ds.ranksSecure::smk_expt::continous, without NAs, with negative") test_that("continous, without NAs, with negative", { - res.cc <- ds.completeCases("D$LAB_TRIG", newobj="CC_LAB_TRIG") - expect_equal(res.cc$validity.check, " appears valid in all sources") - - res.num.na <- ds.numNA("CC_LAB_TRIG") - expect_length(res.num.na, 3) - expect_equal(res.num.na$sim1, 0) - expect_equal(res.num.na$sim2, 0) - expect_equal(res.num.na$sim3, 0) - - res <- ds.ranksSecure("CC_LAB_TRIG") - - expect_equal_to_reference(res, 'smk_expt-results/ds.ranksSecure-cont_neg.rds') +# res.cc <- ds.completeCases("D$LAB_TRIG", newobj="CC_LAB_TRIG") +# expect_equal(res.cc$validity.check, " appears valid in all sources") +# +# res.num.na <- ds.numNA("CC_LAB_TRIG") +# expect_length(res.num.na, 3) +# expect_equal(res.num.na$sim1, 0) +# expect_equal(res.num.na$sim2, 0) +# expect_equal(res.num.na$sim3, 0) +# +# res <- ds.ranksSecure("CC_LAB_TRIG") +# +# expect_equal_to_reference(res, 'smk_expt-results/ds.ranksSecure-cont_neg.rds') }) # context("ds.ranksSecure::smk_expt::continous, with NAs, all positive") test_that("continous, with NAs, all positive", { - res <- ds.ranksSecure("D$LAB_TSC") - - expect_equal_to_reference(res, 'smk_expt-results/ds.ranksSecure-cont_nas.rds') +# res <- ds.ranksSecure("D$LAB_TSC") +# +# expect_equal_to_reference(res, 'smk_expt-results/ds.ranksSecure-cont_nas.rds') }) # context("ds.ranksSecure::smk_expt::continous, with NAs, with negative") test_that("continous, with NAs, with negative", { - res <- ds.ranksSecure("D$LAB_TRIG") - - expect_equal_to_reference(res, 'smk_expt-results/ds.ranksSecure-cont_nas_neg.rds') +# res <- ds.ranksSecure("D$LAB_TRIG") +# +# expect_equal_to_reference(res, 'smk_expt-results/ds.ranksSecure-cont_nas_neg.rds') }) # @@ -78,7 +78,7 @@ test_that("continous, with NAs, with negative", { # context("ds.ranksSecure::smk_expt::shutdown") test_that("setup", { - ds_expect_variables(c("D", "CC_LAB_TSC", "CC_LAB_TRIG", "final.quantile.df", "summary.ranks.df", "testvar.ranks")) +# ds_expect_variables(c("D", "CC_LAB_TSC", "CC_LAB_TRIG", "final.quantile.df", "summary.ranks.df", "testvar.ranks")) }) disconnect.studies.dataset.cnsim()