diff --git a/Project.toml b/Project.toml index aaaccc94..62caa680 100644 --- a/Project.toml +++ b/Project.toml @@ -10,7 +10,6 @@ Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" ReliabilityDiagrams = "e5f51471-6270-49e4-a15a-f1cfbff4f856" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" -Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [compat] julia = "1" diff --git a/examples/batchensemble.jl b/examples/batchensemble.jl deleted file mode 100644 index cb44d2a5..00000000 --- a/examples/batchensemble.jl +++ /dev/null @@ -1,207 +0,0 @@ -## Classification of MNIST dataset -## with the convolutional neural network known as LeNet5. -## This script also combines various -## packages from the Julia ecosystem with Flux. -using Flux -using Flux.Data: DataLoader -using Flux.Optimise: Optimiser, WeightDecay -using Flux: onehotbatch, onecold, glorot_normal, label_smoothing -using Flux.Losses: logitcrossentropy -using Statistics, Random -using Logging: with_logger -using TensorBoardLogger: TBLogger, tb_overwrite, set_step!, set_step_increment! -using ProgressMeter: @showprogress -import MLDatasets -import BSON -using CUDA -using Formatting - -using DeepUncertainty - -# LeNet5 "constructor". -# The model can be adapted to any image size -# and any number of output classes. -function LeNet5(args; imgsize = (28, 28, 1), nclasses = 10) - out_conv_size = (imgsize[1] ÷ 4 - 3, imgsize[2] ÷ 4 - 3, 16) - - return Chain( - ConvBatchEnsemble((5, 5), imgsize[end] => 6, args.rank, args.ensemble_size, relu), - MaxPool((2, 2)), - ConvBatchEnsemble((5, 5), 6 => 16, args.rank, args.ensemble_size, relu), - MaxPool((2, 2)), - flatten, - DenseBatchEnsemble(prod(out_conv_size), 120, args.rank, args.ensemble_size, relu), - DenseBatchEnsemble(120, 84, args.rank, args.ensemble_size, relu), - DenseBatchEnsemble(84, nclasses, args.rank, args.ensemble_size), - ) -end - -function get_data(args) - xtrain, ytrain = MLDatasets.MNIST.traindata(Float32) - xtest, ytest = MLDatasets.MNIST.testdata(Float32) - - xtrain = reshape(xtrain, 28, 28, 1, :) - xtest = reshape(xtest, 28, 28, 1, :) - - ytrain, ytest = onehotbatch(ytrain, 0:9), onehotbatch(ytest, 0:9) - - train_loader = DataLoader( - (xtrain, ytrain), - batchsize = args.batchsize, - shuffle = true, - partial = false, - ) - test_loader = DataLoader((xtest, ytest), batchsize = args.batchsize, partial = false) - - return train_loader, test_loader -end - -loss(ŷ, y) = logitcrossentropy(ŷ, y) - -function accuracy(preds, labels) - acc = sum(onecold(preds |> cpu) .== onecold(labels |> cpu)) - return acc -end - -function eval_loss_accuracy(args, loader, model, device) - l = [0.0f0 for x = 1:args.ensemble_size] - acc = [0 for x = 1:args.ensemble_size] - ece_list = [0.0f0 for x = 1:args.ensemble_size] - ntot = 0 - mean_l = 0 - mean_acc = 0 - mean_ece = 0 - for (x, y) in loader - x = repeat(x, 1, 1, 1, args.ensemble_size) - x, y = x |> device, y |> device - # Perform the forward pass - ŷ = model(x) - ŷ = softmax(ŷ, dims = 1) - # Reshape the predictions into [classes, batch_size, ensemble_size - reshaped_ŷ = reshape(ŷ, size(ŷ)[1], args.batchsize, args.ensemble_size) - # Loop through each model's predictions - for ensemble = 1:args.ensemble_size - model_predictions = reshaped_ŷ[:, :, ensemble] - # Calculate individual loss - l[ensemble] += loss(model_predictions, y) * size(model_predictions)[end] - acc[ensemble] += accuracy(model_predictions, y) - ece_list[ensemble] += - ExpectedCalibrationError(model_predictions |> cpu, onecold(y |> cpu)) * - args.batchsize - end - # Get the mean predictions - mean_predictions = mean(reshaped_ŷ, dims = ndims(reshaped_ŷ)) - mean_predictions = dropdims(mean_predictions, dims = ndims(mean_predictions)) - mean_l += loss(mean_predictions, y) * size(mean_predictions)[end] - mean_acc += accuracy(mean_predictions, y) - mean_ece += - ExpectedCalibrationError(mean_predictions |> cpu, onecold(y |> cpu)) * - args.batchsize - ntot += size(mean_predictions)[end] - end - # Normalize the loss - losses = [loss / ntot |> round4 for loss in l] - acc = [a / ntot * 100 |> round4 for a in acc] - ece_list = [x / ntot |> round4 for x in ece_list] - # Calculate mean loss - mean_l = mean_l / ntot |> round4 - mean_acc = mean_acc / ntot * 100 |> round4 - mean_ece = mean_ece / ntot |> round4 - - # Print the per ensemble mode loss and accuracy - for ensemble = 1:args.ensemble_size - @info (format( - "Model {} Loss: {} Accuracy: {} ECE: {}", - ensemble, - losses[ensemble], - acc[ensemble], - ece_list[ensemble], - )) - end - @info (format( - "Mean Loss: {} Mean Accuracy: {} Mean ECE: {}", - mean_l, - mean_acc, - mean_ece, - )) - @info "===========================================================" - return nothing -end - -## utility functions -num_params(model) = sum(length, Flux.params(model)) -round4(x) = round(x, digits = 4) - -# arguments for the `train` function -Base.@kwdef mutable struct Args - η = 3e-4 # learning rate - λ = 0 # L2 regularizer param, implemented as weight decay - batchsize = 32 # batch size - epochs = 10 # number of epochs - seed = 0 # set seed > 0 for reproducibility - use_cuda = true # if true use cuda (if available) - infotime = 1 # report every `infotime` epochs - checktime = 5 # Save the model every `checktime` epochs. Set to 0 for no checkpoints. - savepath = "runs/" # results path - rank = 1 - ensemble_size = 4 -end - -function train(; kws...) - args = Args(; kws...) - args.seed > 0 && Random.seed!(args.seed) - use_cuda = args.use_cuda && CUDA.functional() - - if use_cuda - device = gpu - @info "Training on GPU" - else - device = cpu - @info "Training on CPU" - end - - ## DATA - train_loader, test_loader = get_data(args) - @info "Dataset MNIST: $(train_loader.nobs) train and $(test_loader.nobs) test examples" - - ## MODEL AND OPTIMIZER - model = LeNet5(args) |> device - @info "LeNet5 model: $(num_params(model)) trainable params" - - ps = Flux.params(model) - - opt = ADAM(args.η) - if args.λ > 0 # add weight decay, equivalent to L2 regularization - opt = Optimiser(WeightDecay(args.λ), opt) - end - - function report(epoch) - # @info "Train Metrics" - # eval_loss_accuracy(args, train_loader, model, device) - @info "Test metrics" - eval_loss_accuracy(args, test_loader, model, device) - end - - ## TRAINING - @info "Start Training" - report(0) - for epoch = 1:args.epochs - @showprogress for (x, y) in train_loader - # Make copies of batches for ensembles - x = repeat(x, 1, 1, 1, args.ensemble_size) - y = repeat(y, 1, args.ensemble_size) - x, y = x |> device, y |> device - gs = Flux.gradient(ps) do - ŷ = model(x) - loss(ŷ, y) - end - - Flux.Optimise.update!(opt, ps, gs) - end - - ## Printing and logging - epoch % args.infotime == 0 && report(epoch) - end -end - -train() diff --git a/src/DeepUncertainty.jl b/src/DeepUncertainty.jl index 74de5589..5aacd657 100644 --- a/src/DeepUncertainty.jl +++ b/src/DeepUncertainty.jl @@ -1,17 +1,10 @@ module DeepUncertainty -using Flux -using Random -using Flux: @functor, glorot_normal, create_bias - # Export layers export MCLayer, MCDense, MCConv -export DenseBatchEnsemble, ConvBatchEnsemble export mean_loglikelihood, brier_score, ExpectedCalibrationError, prediction_metrics include("metrics.jl") include("layers/mclayers.jl") -include("layers/BatchEnsemble/dense.jl") -include("layers/BatchEnsemble/conv.jl") end diff --git a/src/layers/BatchEnsemble/conv.jl b/src/layers/BatchEnsemble/conv.jl deleted file mode 100644 index 564c943b..00000000 --- a/src/layers/BatchEnsemble/conv.jl +++ /dev/null @@ -1,145 +0,0 @@ -""" - ConvBatchEnsemble(filter, in => out, rank, - ensemble_size, σ = identity; - stride = 1, pad = 0, dilation = 1, - groups = 1, [bias, weight, init]) - ConvBatchEnsemble(layer, alpha, gamma, ensemble_bias, ensemble_act, rank) - -Creates a conv BatchEnsemble layer. Batch ensemble is a memory efficient alternative -for deep ensembles. In deep ensembles, if the ensemble size is N, N different models -are trained, making the time and memory complexity O(N * complexity of one network). -BatchEnsemble generates weight matrices for each member in the ensemble using a -couple of rank 1 vectors R (alpha), S (gamma), RS' and multiplying the result with -weight matrix W element wise. We also call R and S as fast weights. - -Reference - https://arxiv.org/abs/2002.06715 - -During both training and testing, we repeat the samples along the batch dimension -N times, where N is the ensemble_size. For example, if each mini batch has 10 samples -and our ensemble size is 4, then the actual input to the layer has 40 samples. -The output of the layer has 40 samples as well, and each 10 samples can be considered -as the output of an esnemble member. - -# Fields -- `layer`: The dense layer which transforms the pertubed input to output -- `alpha`: The first Fast weight of size (in_dim, ensemble_size) -- `gamma`: The second Fast weight of size (out_dim, ensemble_size) -- `ensemble_bias`: Bias added to the ensemble output, separate from dense layer bias -- `ensemble_act`: The activation function to be applied on ensemble output -- `rank`: Rank of the fast weights (rank > 1 doesn't work on GPU for now) - -# Arguments -- `filter::NTuple{N,Integer}`: Kernel dimensions, eg, (5, 5) -- `ch::Pair{<:Integer,<:Integer}`: Input channels => output channels -- `rank::Integer`: Rank of the fast weights -- `ensemble_size::Integer`: Number of models in the ensemble -- `σ::F=identity`: Activation of the dense layer, defaults to identity -- `init=glorot_normal`: Initialization function, defaults to glorot_normal -- `alpha_init=glorot_normal`: Initialization function for the alpha fast weight, - defaults to glorot_normal -- `gamma_init=glorot_normal`: Initialization function for the gamma fast weight, - defaults to glorot_normal -- `bias::Bool=true`: Toggle the usage of bias in the dense layer -- `ensemble_bias::Bool=true`: Toggle the usage of ensemble bias -- `ensemble_act::F=identity`: Activation function for enseble outputs -""" -struct ConvBatchEnsemble{L,F,M,B} - layer::L - alpha::M - gamma::M - ensemble_bias::B - ensemble_act::F - rank::Any - function ConvBatchEnsemble( - layer::L, - alpha::M, - gamma::M, - ensemble_bias = true, - ensemble_act::F = identity, - rank = 1, - ) where {M,F,L} - ensemble_bias = create_bias(gamma, ensemble_bias, size(gamma)[1], size(gamma)[2]) - new{typeof(layer),F,M,typeof(ensemble_bias)}( - layer, - alpha, - gamma, - ensemble_bias, - ensemble_act, - rank, - ) - end -end - -function ConvBatchEnsemble( - k::NTuple{N,Integer}, - ch::Pair{<:Integer,<:Integer}, - rank::Integer, - ensemble_size::Integer, - σ = identity; - init = glorot_normal, - alpha_init = glorot_normal, - gamma_init = glorot_normal, - stride = 1, - pad = 0, - dilation = 1, - groups = 1, - bias = true, - ensemble_bias = true, - ensemble_act = identity, -) where {N} - layer = Flux.Conv( - k, - ch, - σ; - stride = stride, - pad = pad, - dilation = dilation, - init = init, - groups = groups, - bias = bias, - ) - in_dim = ch[1] - out_dim = ch[2] - if rank >= 1 - alpha_shape = (in_dim, ensemble_size) - gamma_shape = (out_dim, ensemble_size) - else - error("Rank must be >= 1.") - end - alpha = alpha_init(alpha_shape) - gamma = gamma_init(gamma_shape) - - return ConvBatchEnsemble(layer, alpha, gamma, ensemble_bias, ensemble_act, rank) -end - -@functor ConvBatchEnsemble - -function (be::ConvBatchEnsemble)(x) - # Conv Batch Ensemble params - layer = be.layer - alpha = be.alpha - gamma = be.gamma - e_b = be.ensemble_bias - e_σ = be.ensemble_act - - batch_size = size(x)[end] - in_size = size(alpha)[1] - out_size = size(gamma)[1] - ensemble_size = size(alpha)[2] - samples_per_model = batch_size ÷ ensemble_size - - # Alpha, gamma shapes - [units, ensembles, rank] - e_b = repeat(e_b, samples_per_model) - alpha = repeat(alpha, samples_per_model) - gamma = repeat(gamma, samples_per_model) - # Reshape alpha, gamma to [units, batch_size, rank] - e_b = reshape(e_b, (1, 1, out_size, batch_size)) - alpha = reshape(alpha, (1, 1, in_size, batch_size)) - gamma = reshape(gamma, (1, 1, out_size, batch_size)) - - perturbed_x = x .* alpha - output = layer(perturbed_x) .* gamma - output = e_σ.(output .+ e_b) - - return output -end diff --git a/src/layers/BatchEnsemble/dense.jl b/src/layers/BatchEnsemble/dense.jl deleted file mode 100644 index 9ef92921..00000000 --- a/src/layers/BatchEnsemble/dense.jl +++ /dev/null @@ -1,151 +0,0 @@ -""" -DenseBatchEnsemble(in, out, rank, - ensemble_size, - σ=identity; - bias=true, - init=glorot_normal, - alpha_init=glorot_normal, - gamma_init=glorot_normal) -DenseBatchEnsemble(layer, alpha, gamma, ensemble_bias, ensemble_act, rank) - -Creates a dense BatchEnsemble layer. Batch ensemble is a memory efficient alternative -for deep ensembles. In deep ensembles, if the ensemble size is N, N different models -are trained, making the time and memory complexity O(N * complexity of one network). -BatchEnsemble generates weight matrices for each member in the ensemble using a -couple of rank 1 vectors R (alpha), S (gamma), RS' and multiplying the result with -weight matrix W element wise. We also call R and S as fast weights. - -Reference - https://arxiv.org/abs/2002.06715 - -During both training and testing, we repeat the samples along the batch dimension -N times, where N is the ensemble_size. For example, if each mini batch has 10 samples -and our ensemble size is 4, then the actual input to the layer has 40 samples. -The output of the layer has 40 samples as well, and each 10 samples can be considered -as the output of an esnemble member. - -# Fields -- `layer`: The dense layer which transforms the pertubed input to output -- `alpha`: The first Fast weight of size (in_dim, ensemble_size) -- `gamma`: The second Fast weight of size (out_dim, ensemble_size) -- `ensemble_bias`: Bias added to the ensemble output, separate from dense layer bias -- `ensemble_act`: The activation function to be applied on ensemble output -- `rank`: Rank of the fast weights (rank > 1 doesn't work on GPU for now) - -# Arguments -- `in::Integer`: Input dimension of features -- `out::Integer`: Output dimension of features -- `rank::Integer`: Rank of the fast weights -- `ensemble_size::Integer`: Number of models in the ensemble -- `σ::F=identity`: Activation of the dense layer, defaults to identity -- `init=glorot_normal`: Initialization function, defaults to glorot_normal -- `alpha_init=glorot_normal`: Initialization function for the alpha fast weight, - defaults to glorot_normal -- `gamma_init=glorot_normal`: Initialization function for the gamma fast weight, - defaults to glorot_normal -- `bias::Bool=true`: Toggle the usage of bias in the dense layer -- `ensemble_bias::Bool=true`: Toggle the usage of ensemble bias -- `ensemble_act::F=identity`: Activation function for enseble outputs -""" -struct DenseBatchEnsemble{L,F,M,B} - layer::L - alpha::M - gamma::M - ensemble_bias::B - ensemble_act::F - rank::Any - function DenseBatchEnsemble( - layer::L, - alpha::M, - gamma::M, - ensemble_bias = true, - ensemble_act::F = identity, - rank = 1, - ) where {M,F,L} - ensemble_bias = create_bias(gamma, ensemble_bias, size(gamma)[1], size(gamma)[2]) - new{typeof(layer),F,M,typeof(ensemble_bias)}( - layer, - alpha, - gamma, - ensemble_bias, - ensemble_act, - rank, - ) - end -end - -function DenseBatchEnsemble( - in::Integer, - out::Integer, - rank::Integer, - ensemble_size::Integer, - σ = identity; - init = glorot_normal, - alpha_init = glorot_normal, - gamma_init = glorot_normal, - bias = true, - ensemble_bias = true, - ensemble_act = identity, -) - - layer = Flux.Dense(in, out, σ; init = init, bias = bias) - if rank >= 1 - alpha_shape = (in, ensemble_size, rank) - gamma_shape = (out, ensemble_size, rank) - else - error("Rank must be >= 1.") - end - alpha = alpha_init(alpha_shape) - gamma = gamma_init(gamma_shape) - - return DenseBatchEnsemble(layer, alpha, gamma, ensemble_bias, ensemble_act, rank) -end - -@functor DenseBatchEnsemble - -""" -The forward pass for a DenseBatchEnsemble layer. The input is initially perturbed -using the first fast weight, then passed through the dense layer, and finall -multiplied by the second fast weight. - -# Arguments -- `x::AbstractVecOrMat`: Input tensors -""" -function (be::DenseBatchEnsemble)(x) - layer = be.layer - alpha = be.alpha - gamma = be.gamma - e_b = be.ensemble_bias - e_σ = be.ensemble_act - rank = be.rank - - batch_size = size(x)[end] - in_size = size(alpha)[1] - out_size = size(gamma)[1] - ensemble_size = size(alpha)[2] - samples_per_model = batch_size ÷ ensemble_size - - # Alpha, gamma shapes - [units, ensembles, rank] - alpha = reshape(alpha, (in_size, ensemble_size * rank)) - gamma = reshape(gamma, (out_size, ensemble_size * rank)) - # Repeat breaks on GPU when input dims > 2 - alpha = repeat(alpha, samples_per_model) - gamma = repeat(gamma, samples_per_model) - # Reshape alpha, gamma to [units, batch_size, rank] - alpha = reshape(alpha, (in_size, batch_size, rank)) - gamma = reshape(gamma, (out_size, batch_size, rank)) - # Reshape inputs to [units, batch_size, 1] for broadcasting - x = Flux.unsqueeze(x, (ndims(x) + 1)) - # Perturb the inputs - perturbed_x = x .* alpha - # Dense layer forward pass - outputs = layer(perturbed_x) .* gamma - # Reduce the rank dimension through summing it up - outputs = sum(outputs, dims = 3) - outputs = reshape(outputs, (out_size, samples_per_model, ensemble_size)) - # Reshape ensemble bias - e_b = Flux.unsqueeze(e_b, ndims(e_b)) - - outputs = e_σ.(outputs .+ e_b) - outputs = reshape(outputs, (out_size, batch_size)) - return outputs -end diff --git a/test/cuda/layers/batchensemble_gpu.jl b/test/cuda/layers/batchensemble_gpu.jl deleted file mode 100644 index 2d6ed695..00000000 --- a/test/cuda/layers/batchensemble_gpu.jl +++ /dev/null @@ -1,77 +0,0 @@ -@testset "Dense batchensemble" begin - ensemble_size = 4 - samples_per_model = 4 - input_dim = 5 - output_dim = 5 - rank = 1 - inputs = rand(Float32, input_dim, samples_per_model) - layer = DenseBatchEnsemble( - input_dim, - output_dim, - rank, - ensemble_size; - alpha_init = ones, - gamma_init = ones, - ) - layer = layer |> gpu - batch_inputs = gpu(repeat(inputs, 1, ensemble_size)) - batch_outputs = layer(batch_inputs) - # Do the computation in for loop to compare outputs - layer = layer |> cpu - loop_outputs = [] - for i = 1:ensemble_size - perturbed_inputs = inputs .* layer.alpha[i] - outputs = layer.layer(perturbed_inputs) .* layer.gamma[i] - outputs = layer.ensemble_act.(outputs .+ layer.ensemble_bias[i]) - push!(loop_outputs, outputs) - end - loop_outputs = Flux.batch(loop_outputs) - loop_outputs = reshape(loop_outputs, (output_dim, samples_per_model * ensemble_size)) - @test batch_outputs isa CuArray - @test size(batch_outputs) == size(loop_outputs) - @test isapprox(cpu(batch_outputs), loop_outputs, atol = 0.05) -end - -@testset "ConvBatchEnsemble" begin - ensemble_size = 4 - samples_per_model = 4 - input_dim = 5 - output_dim = 10 - rank = 1 - inputs = rand(Float32, 10, 10, input_dim, samples_per_model) - beconv = ConvBatchEnsemble( - (5, 5), - 5 => 10, - rank, - ensemble_size, - relu; - alpha_init = ones, - gamma_init = ones, - ) - beconv = beconv |> gpu - batch_inputs = gpu(repeat(inputs, 1, 1, 1, ensemble_size)) - batch_outputs = beconv(batch_inputs) - # Do the computation in for loop to compare outputs - beconv = beconv |> cpu - loop_outputs = [] - for i = 1:ensemble_size - perturbed_inputs = inputs .* beconv.alpha[i] - outputs = beconv.layer(perturbed_inputs) .* beconv.gamma[i] - outputs = beconv.ensemble_act.(outputs .+ beconv.ensemble_bias[i]) - push!(loop_outputs, outputs) - end - loop_outputs = Flux.batch(loop_outputs) - loop_outputs_size = size(batch_outputs) - loop_outputs = reshape( - loop_outputs, - ( - loop_outputs_size[1], - loop_outputs_size[2], - output_dim, - samples_per_model * ensemble_size, - ), - ) - @test batch_outputs isa CuArray - @test size(batch_outputs) == size(loop_outputs) - @test isapprox(cpu(batch_outputs), loop_outputs, atol = 0.05) -end diff --git a/test/cuda/layers/mclayers_gpu.jl b/test/cuda/layers/mclayers_gpu.jl deleted file mode 100644 index f176cfbf..00000000 --- a/test/cuda/layers/mclayers_gpu.jl +++ /dev/null @@ -1,37 +0,0 @@ -function test_sparsity(x, target_sparsity; atol = 0.05) - number_of_zeros = count(ele -> (ele == 0.0), x) - sparsity = number_of_zeros / sum(length, x) - @test isapprox(target_sparsity, sparsity; atol) -end - -@testset "MC Dense GPU" begin - dropout_rate = 0.35 - # Test MC Dense layer - a = gpu(rand(Float32, 8, 32)) - layer = gpu(MCDense(8, 16, dropout_rate)) - output = layer(a) - # Test if it's CuArray - @test output isa CuArray - @test isequal(size(output), (16, 32)) - test_sparsity(output, dropout_rate) - # Test MC dense dropout toggle - output = layer(a, dropout = false) - test_sparsity(output, 0) -end - -@testset "MC Conv GPU" begin - dropout_rate = 0.4 - # Test MC conv layer - a = gpu(rand(Float32, 32, 32, 3, 32)) - layer = MCConv((5, 5), 3 => 6, dropout_rate) |> gpu - output = layer(a) - # Test if it's CuArray - @test output isa CuArray - # Test the output shape - @test isequal(size(output), (28, 28, 6, 32)) - # Test the sparsity percentage in the array - test_sparsity(output, dropout_rate) - # Test MC conv dropout toggle - output = layer(a, dropout = false) - test_sparsity(output, 0) -end diff --git a/test/cuda/runtests.jl b/test/cuda/runtests.jl deleted file mode 100644 index 8e9a4141..00000000 --- a/test/cuda/runtests.jl +++ /dev/null @@ -1,7 +0,0 @@ -using Flux, Test, CUDA - -@info "Testing GPU Support" -CUDA.allowscalar(false) - -include("layers/mclayers_gpu.jl") -include("layers/batchensemble_gpu.jl") diff --git a/test/layers/batchensemble.jl b/test/layers/batchensemble.jl deleted file mode 100644 index 4107178e..00000000 --- a/test/layers/batchensemble.jl +++ /dev/null @@ -1,72 +0,0 @@ -@testset "Dense batchensemble" begin - ensemble_size = 4 - samples_per_model = 4 - input_dim = 5 - output_dim = 5 - rank = 1 - inputs = rand(Float32, input_dim, samples_per_model) - layer = DenseBatchEnsemble( - input_dim, - output_dim, - rank, - ensemble_size; - alpha_init = ones, - gamma_init = ones, - ) - batch_inputs = repeat(inputs, 1, ensemble_size) - batch_outputs = layer(batch_inputs) - # Do the computation in for loop to compare outputs - loop_outputs = [] - for i = 1:ensemble_size - perturbed_inputs = inputs .* layer.alpha[i] - outputs = layer.layer(perturbed_inputs) .* layer.gamma[i] - outputs = layer.ensemble_act.(outputs .+ layer.ensemble_bias[i]) - push!(loop_outputs, outputs) - end - loop_outputs = Flux.batch(loop_outputs) - loop_outputs = reshape(loop_outputs, (output_dim, samples_per_model * ensemble_size)) - @test size(batch_outputs) == size(loop_outputs) - @test isapprox(batch_outputs, loop_outputs, atol = 0.05) -end - -@testset "ConvBatchEnsemble" begin - ensemble_size = 4 - samples_per_model = 4 - input_dim = 5 - output_dim = 10 - rank = 1 - inputs = rand(Float32, 10, 10, input_dim, samples_per_model) - beconv = ConvBatchEnsemble( - (5, 5), - 5 => 10, - rank, - ensemble_size, - relu; - alpha_init = ones, - gamma_init = ones, - ) - batch_inputs = repeat(inputs, 1, 1, 1, ensemble_size) - batch_outputs = beconv(batch_inputs) - - # Do the computation in for loop to compare outputs - loop_outputs = [] - for i = 1:ensemble_size - perturbed_inputs = inputs .* beconv.alpha[i] - outputs = beconv.layer(perturbed_inputs) .* beconv.gamma[i] - outputs = beconv.ensemble_act.(outputs .+ beconv.ensemble_bias[i]) - push!(loop_outputs, outputs) - end - loop_outputs = Flux.batch(loop_outputs) - loop_outputs_size = size(batch_outputs) - loop_outputs = reshape( - loop_outputs, - ( - loop_outputs_size[1], - loop_outputs_size[2], - output_dim, - samples_per_model * ensemble_size, - ), - ) - @test size(batch_outputs) == size(loop_outputs) - @test isapprox(batch_outputs, loop_outputs, atol = 0.05) -end diff --git a/test/layers/mclayers.jl b/test/layers/mclayers_test.jl similarity index 50% rename from test/layers/mclayers.jl rename to test/layers/mclayers_test.jl index 1e8c2627..9d963b4a 100644 --- a/test/layers/mclayers.jl +++ b/test/layers/mclayers_test.jl @@ -1,8 +1,5 @@ -function test_sparsity(x, target_sparsity; atol = 0.05) - number_of_zeros = count(ele -> (ele == 0.0), x) - sparsity = number_of_zeros / sum(length, x) - @test isapprox(target_sparsity, sparsity; atol) -end +using Test +using DeepUncertainty: MCDense, MCConv @testset "MC Dense" begin dropout_rate = 0.35 @@ -10,11 +7,16 @@ end a = rand(Float32, 8, 32) layer = MCDense(8, 16, dropout_rate) output = layer(a) + number_of_zeros = count(x -> (x == 0.0), output) + sparsity = number_of_zeros / sum(length, output) @test isequal(size(output), (16, 32)) - test_sparsity(output, dropout_rate) + @test isapprox(dropout_rate, sparsity; atol = 0.05) + # Test MC dense dropout toggle output = layer(a, dropout = false) - test_sparsity(output, 0) + number_of_zeros = count(x -> (x == 0.0), output) + sparsity = number_of_zeros / sum(length, output) + @test isapprox(0, sparsity; atol = 0.05) end @testset "MC Conv" begin @@ -23,11 +25,16 @@ end a = rand(Float32, 32, 32, 3, 32) layer = MCConv((5, 5), 3 => 6, dropout_rate) output = layer(a) + number_of_zeros = count(x -> (x == 0.0), output) + sparsity = number_of_zeros / sum(length, output) # Test the output shape @test isequal(size(output), (28, 28, 6, 32)) # Test the sparsity percentage in the array - test_sparsity(output, dropout_rate) + @test isapprox(dropout_rate, sparsity; atol = 0.05) + # Test MC conv dropout toggle output = layer(a, dropout = false) - test_sparsity(output, 0) + number_of_zeros = count(x -> (x == 0.0), output) + sparsity = number_of_zeros / sum(length, output) + @test isapprox(0, sparsity; atol = 0.05) end diff --git a/test/runtests.jl b/test/runtests.jl index 476d7605..99d1e32a 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,20 +1,7 @@ using DeepUncertainty using Test -using Flux -using Flux.CUDA -using Flux: cpu, gpu @testset "Layers" begin - # MC layers - include("./layers/mclayers.jl") - # Batch ensembe layers - include("./layers/batchensemble.jl") -end - -@testset "CUDA" begin - if CUDA.functional() - include("cuda/runtests.jl") - else - @warn "CUDA unavailable, not testing GPU support" - end + # Test the layers + include("./layers/mclayers_test.jl") end