Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
e80389c
add converged method
Maximilian-Stefan-Ernst Mar 9, 2026
1623119
refactor bootstrap and add bootstrap for any statistic
Maximilian-Stefan-Ernst Mar 9, 2026
8f2eaf1
add CI and p values
Maximilian-Stefan-Ernst Mar 9, 2026
6f1246c
add exports
Maximilian-Stefan-Ernst Mar 9, 2026
20c8b63
add boostrap, CI and p-values tests
Maximilian-Stefan-Ernst Mar 9, 2026
0eb04ef
Apply suggestions from formatter
Maximilian-Stefan-Ernst Mar 9, 2026
5e5575b
fix bootstrap tests
Maximilian-Stefan-Ernst Mar 9, 2026
c27706d
fix bootstrap tests
Maximilian-Stefan-Ernst Mar 9, 2026
24a7947
fix nobs var check in update_observed, keep previous args in update_o…
Maximilian-Stefan-Ernst Mar 12, 2026
aa62557
increase tolerance for bootstrap test
Maximilian-Stefan-Ernst Mar 12, 2026
fdca879
increase tolerance for bootstrap test
Maximilian-Stefan-Ernst Mar 12, 2026
ccf8c0d
remove bootstrap try-catch and update tests
Maximilian-Stefan-Ernst Mar 12, 2026
1adb1fa
fix bootstrap
Maximilian-Stefan-Ernst Mar 12, 2026
9478d55
fix bootstrap
Maximilian-Stefan-Ernst Mar 12, 2026
2e37710
fix bootstrap
Maximilian-Stefan-Ernst Mar 12, 2026
58cdb0f
fix mg weights
Maximilian-Stefan-Ernst Mar 13, 2026
7f072f4
fix bootstrap tests
Maximilian-Stefan-Ernst Mar 20, 2026
cacb73c
fix bootstrap tests
Maximilian-Stefan-Ernst Mar 20, 2026
a1c76af
add WLS option to fix weight matrix for updaeting observed data
Maximilian-Stefan-Ernst Mar 20, 2026
537cf16
narrower output types and remove locks for bootstrap + code formatting
Maximilian-Stefan-Ernst Mar 20, 2026
d00a65e
Apply suggestions from code review
Maximilian-Stefan-Ernst Mar 20, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions ext/SEMNLOptExt/NLopt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,10 @@ end
SEM.algorithm_name(res::NLoptResult) = res.problem.algorithm
SEM.n_iterations(res::NLoptResult) = res.problem.numevals
SEM.convergence(res::NLoptResult) = res.result[3]
function SEM.converged(res::NLoptResult)
flag = res.result[3]
return flag ∈ [:SUCCESS, :STOPVAL_REACHED, :FTOL_REACHED, :XTOL_REACHED]
end

# construct NLopt.jl problem
function NLopt_problem(algorithm, options, npar)
Expand Down
2 changes: 1 addition & 1 deletion ext/SEMProximalOptExt/ProximalAlgorithms.jl
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,10 @@ SEM.algorithm_name(res::ProximalResult) = SEM.algorithm_name(res.optimizer.algor
SEM.algorithm_name(
::ProximalAlgorithms.IterativeAlgorithm{I, H, S, D, K},
) where {I, H, S, D, K} = nameof(I)

SEM.convergence(
::ProximalResult,
) = "No standard convergence criteria for proximal \n algorithms available."
SEM.converged(::ProximalResult) = missing
SEM.n_iterations(res::ProximalResult) = res.n_iterations

############################################################################################
Expand Down
8 changes: 8 additions & 0 deletions src/StructuralEquationModels.jl
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ include("frontend/fit/fitmeasures/fit_measures.jl")
# standard errors
include("frontend/fit/standard_errors/hessian.jl")
include("frontend/fit/standard_errors/bootstrap.jl")
include("frontend/fit/standard_errors/z_test.jl")
include("frontend/fit/standard_errors/confidence_intervals.jl")

export AbstractSem,
AbstractSemSingle,
Expand Down Expand Up @@ -129,6 +131,7 @@ export AbstractSem,
optimizer_engine_doc,
optimizer_engines,
n_iterations,
converged,
convergence,
SemObserved,
SemObservedData,
Expand Down Expand Up @@ -191,7 +194,12 @@ export AbstractSem,
CFI,
EmMVNModel,
se_hessian,
bootstrap,
se_bootstrap,
normal_CI,
normal_CI!,
z_test,
z_test!,
example_data,
replace_observed,
update_observed,
Expand Down
4 changes: 3 additions & 1 deletion src/frontend/fit/SemFit.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ Fitted structural equation model.

- `algorithm_name(::SemFit)` -> optimization algorithm
- `n_iterations(::SemFit)` -> number of iterations
- `convergence(::SemFit)` -> convergence properties
- `convergence(::SemFit)` -> convergence flags
- `converged(::SemFit)` -> convergence success
"""
mutable struct SemFit{Mi, So, St, Mo, O}
minimum::Mi
Expand Down Expand Up @@ -71,3 +72,4 @@ optimizer_engine(sem_fit::SemFit) = optimizer_engine(optimization_result(sem_fit
algorithm_name(sem_fit::SemFit) = algorithm_name(optimization_result(sem_fit))
n_iterations(sem_fit::SemFit) = n_iterations(optimization_result(sem_fit))
convergence(sem_fit::SemFit) = convergence(optimization_result(sem_fit))
converged(sem_fit::SemFit) = converged(optimization_result(sem_fit))
196 changes: 161 additions & 35 deletions src/frontend/fit/standard_errors/bootstrap.jl
Original file line number Diff line number Diff line change
@@ -1,39 +1,164 @@
"""
bootstrap(
fitted::SemFit,
specification::SemSpecification;
statistic = solution,
n_boot = 3000,
data = nothing,
engine = :Optim,
parallel = false,
fit_kwargs = Dict(),
replace_kwargs = Dict())

Return bootstrap samples for `statistic`.

# Arguments
- `fitted`: a fitted SEM.
- `specification`: a `ParameterTable` or `RAMMatrices` object passed to `replace_observed`.
- `statistic`: any function that can be called on a `SemFit` object.
The output will be returned as the bootstrap sample.
- `n_boot`: number of boostrap samples
- `data`: data to sample from. Only needed if different than the data from `sem_fit`
- `engine`: optimizer engine, passed to `fit`.
- `parallel`: if `true`, run bootstrap samples in parallel on all available threads.
The number of threads is controlled by the `JULIA_NUM_THREADS` environment variable or
the `--threads` flag when starting Julia.
- `fit_kwargs` : a `Dict` controlling model fitting for each bootstrap sample,
passed to `fit`
- `replace_kwargs`: a `Dict` passed to `replace_observed`

# Example
```julia
# 1000 boostrap samples of the minimum, fitted with :Optim
bootstrap(
fitted;
statistic = StructuralEquationModels.minimum,
n_boot = 1000,
engine = :Optim,
)
```
"""
function bootstrap(
fitted::SemFit,
specification::SemSpecification;
statistic = solution,
n_boot = 3000,
data = nothing,
engine = :Optim,
parallel = false,
fit_kwargs = Dict(),
replace_kwargs = Dict(),
)
# access data and convert to matrix
data = prepare_data_bootstrap(data, fitted.model)
start = solution(fitted)
# pre-allocations
out = Vector{Any}(nothing, n_boot)
conv = fill(false, n_boot)
# fit to bootstrap samples
if !parallel
for i in 1:n_boot
sample_data = bootstrap_sample(data)
new_model = replace_observed(
fitted.model;
data = sample_data,
specification = specification,
replace_kwargs...,
)
new_fit = fit(new_model; start_val = start, engine = engine, fit_kwargs...)
sample = statistic(new_fit)
c = converged(new_fit)
out[i] = sample
conv[i] = c
end
else
n_threads = Threads.nthreads()
# Pre-create one independent model copy per thread via deepcopy.
model_pool = Channel(n_threads)
for _ in 1:n_threads
put!(model_pool, deepcopy(fitted.model))
end
# fit models in parallel
lk = ReentrantLock()
Threads.@threads for i in 1:n_boot
thread_model = take!(model_pool)
sample_data = bootstrap_sample(data)
new_model = replace_observed(
thread_model;
data = sample_data,
specification = specification,
replace_kwargs...,
)
new_fit = fit(new_model; start_val = start, engine = engine, fit_kwargs...)
sample = statistic(new_fit)
c = converged(new_fit)
out[i] = sample
conv[i] = c
put!(model_pool, thread_model)
end
end
return Dict(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's better to use NamedTuple here.
Also, while it might be fragile to use Base.return_types() to infer the result of statistic() before it is called, here it should be in principle possible to use T = mapreduce(typeof, promote_type, out) and Vector{T}(out) to help with the type inference and faster code generation downstream.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree in principle that the NamedTuple is better, but I will keep the Dict for now because it has a much nicer console output that is actually humanly readable. Nice idea to fix the type, curiously collect(a for a in out) seems to be faster by quite a margin.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since bootstrap() will be the generic method to calculate different statistics, it would be nice to make its result play nicely with the type inference in its callers. NamedTuple is just an ad hoc way to achieve it. If pretty printing is a concern, it could be its own type SemBootstrapResult{T}, and SEM.jl may rely on e.g. PrettyPrinting.jl to quickly implement Base.show(io, bootstrap::SemBootstrapResult) = pprint(bootstrap)

:samples => collect(a for a in out),
:n_boot => n_boot,
:n_converged => sum(conv),
:converged => conv,
)
end

"""
se_bootstrap(
sem_fit::SemFit;
fitted::SemFit,
specification::SemSpecification;
n_boot = 3000,
data = nothing,
specification = nothing,
parallel = false,
kwargs...)
fit_kwargs = Dict(),
replace_kwargs = Dict())

Return bootstrap standard errors.

# Arguments
- `fitted`: a fitted SEM.
- `specification`: a `ParameterTable` or `RAMMatrices` object passed to `replace_observed`.
- `n_boot`: number of boostrap samples
- `data`: data to sample from. Only needed if different than the data from `sem_fit`
- `specification`: a `ParameterTable` or `RAMMatrices` object passed down to `replace_observed`.
Necessary for FIML models.
- `engine`: optimizer engine, passed to `fit`.
- `parallel`: if `true`, run bootstrap samples in parallel on all available threads.
The number of threads is controlled by the `JULIA_NUM_THREADS` environment variable or
the `--threads` flag when starting Julia.
- `kwargs...`: passed down to `replace_observed`
- `fit_kwargs` : a `Dict` controlling model fitting for each bootstrap sample,
passed to `sem_fit`
- `replace_kwargs`: a `Dict` passed to `replace_observed`

# Example
```julia
# 1000 boostrap samples, fitted with :NLopt
using NLopt

se_bootstrap(
fitted;
n_boot = 1000,
engine = :NLopt,
)
```
"""
function se_bootstrap(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should be possible to make se_bootstrap() a wrapper around bootstrap()

fitted::SemFit{Mi, So, St, Mo, O};
fitted::SemFit,
specification::SemSpecification;
n_boot = 3000,
data = nothing,
specification = nothing,
engine = :Optim,
parallel = false,
kwargs...,
) where {Mi, So, St, Mo, O}
fit_kwargs = Dict(),
replace_kwargs = Dict(),
)
# access data and convert to matrix
data = prepare_data_bootstrap(data, fitted.model)
start = solution(fitted)
# pre-allocations
total_sum = zero(start)
total_squared_sum = zero(start)
n_failed = Ref(0)
n_conv = Ref(0)
# fit to bootstrap samples
if !parallel
for _ in 1:n_boot
Expand All @@ -42,14 +167,15 @@ function se_bootstrap(
fitted.model;
data = sample_data,
specification = specification,
kwargs...,
replace_kwargs...,
)
try
sol = solution(fit(new_model; start_val = start))
new_fit = fit(new_model; start_val = start, engine = engine, fit_kwargs...)
sol = solution(new_fit)
conv = converged(new_fit)
if conv
n_conv[] += 1
@. total_sum += sol
@. total_squared_sum += sol^2
catch
n_failed[] += 1
end
end
else
Expand All @@ -63,37 +189,37 @@ function se_bootstrap(
lk = ReentrantLock()
Threads.@threads for _ in 1:n_boot
thread_model = take!(model_pool)
try
sample_data = bootstrap_sample(data)
new_model = replace_observed(
thread_model;
data = sample_data,
specification = specification,
kwargs...,
)
sol = solution(fit(new_model; start_val = start))
sample_data = bootstrap_sample(data)
new_model = replace_observed(
thread_model;
data = sample_data,
specification = specification,
replace_kwargs...,
)
new_fit = fit(new_model; start_val = start, engine = engine, fit_kwargs...)
sol = solution(new_fit)
conv = converged(new_fit)
if conv
lock(lk) do
n_conv[] += 1
@. total_sum += sol
@. total_squared_sum += sol^2
end
catch
lock(lk) do
n_failed[] += 1
end
finally
put!(model_pool, thread_model)
end
put!(model_pool, thread_model)
end
end
# compute parameters
n_conv = n_boot - n_failed[]
n_conv = n_conv[]
sd = sqrt.(total_squared_sum / n_conv - (total_sum / n_conv) .^ 2)
if !iszero(n_failed[])
@warn "During bootstrap sampling, "*string(n_failed[])*" models did not converge"
end
@info string(n_conv)*" models converged"
return sd
end

############################################################################################
### Helper Functions
############################################################################################

function bootstrap_sample(data::Matrix)
nobs = size(data, 1)
index_new = rand(1:nobs, nobs)
Expand Down
43 changes: 43 additions & 0 deletions src/frontend/fit/standard_errors/confidence_intervals.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
_doc_normal_CI = """
(1) normal_CI(fitted, se; α = 0.05, name_lower = :ci_lower, name_upper = :ci_upper)

(2) normal_CI!(partable, fitted, se; α = 0.05, name_lower = :ci_lower, name_upper = :ci_upper)

Return normal-theory confidence intervals for all model parameters.
`normal_CI!` additionally writes the result into `partable`.

# Arguments
- `fitted`: a fitted SEM.
- `se`: standard errors for each parameter, e.g. from [`se_hessian`](@ref) or
[`se_bootstrap`](@ref).
- `partable`: a [`ParameterTable`](@ref) to write confidence intervals to.
- `α`: significance level. Defaults to `0.05` (95% intervals).
- `name_lower`: column name for the lower bound in `partable`. Defaults to `:ci_lower`.
- `name_upper`: column name for the upper bound in `partable`. Defaults to `:ci_upper`.

# Returns
- a `Dict` with keys `name_lower` and `name_upper`, each mapping to a vector of bounds
over all parameters.
"""

@doc "$(_doc_normal_CI)"
function normal_CI(fitted, se; α = 0.05, name_lower = :ci_lower, name_upper = :ci_upper)
qnt = quantile(Normal(0, 1), 1-α/2);
sol = solution(fitted)
return Dict(name_lower => sol - qnt*se, name_upper => sol + qnt*se)
end

@doc "$(_doc_normal_CI)"
function normal_CI!(
partable,
fitted,
se;
α = 0.05,
name_lower = :ci_lower,
name_upper = :ci_upper,
)
cis = normal_CI(fitted, se; α, name_lower, name_upper)
update_partable!(partable, name_lower, fitted, cis[name_lower])
update_partable!(partable, name_upper, fitted, cis[name_upper])
return cis
end
Loading
Loading