From e88d4c1d7d703529a3bfa627b05d9704f60e99f6 Mon Sep 17 00:00:00 2001 From: James Nesbitt Date: Fri, 24 Apr 2026 16:35:15 +0300 Subject: [PATCH 01/10] Fix Windows MCR install: handle exit code 3010 and reboot flow - installer.go: remove dead-code branch that blocked installer download path; GetInstaller() now always attempts the download when no cached path exists. - common.go: hoist rebootable interface to package scope so it is accessible from both windows.go and any future configurers. - windows.go (InstallMCR): * Detect exit code 3010 (ERROR_SUCCESS_REBOOT_REQUIRED) via isExitCode3010() helper and treat it as a reboot-required signal instead of a hard failure. * Preserve fallback: if the installer exits 0 but prints 'Your machine needs to be rebooted', still trigger a reboot. * Fix reboot success fall-through: after rh.Reboot() succeeds, return nil instead of falling through to the 'host isn't rebootable' error return. --- pkg/configurer/common.go | 4 ++++ pkg/configurer/installer.go | 4 ---- pkg/configurer/windows.go | 38 +++++++++++++++++++++++++------------ 3 files changed, 30 insertions(+), 16 deletions(-) diff --git a/pkg/configurer/common.go b/pkg/configurer/common.go index e991dc6a..acc50ee8 100644 --- a/pkg/configurer/common.go +++ b/pkg/configurer/common.go @@ -10,6 +10,10 @@ import ( "github.com/k0sproject/rig/os" ) +type rebootable interface { + Reboot() error +} + type DockerConfigurer struct{} // GetDockerInfo gets docker info from the host. diff --git a/pkg/configurer/installer.go b/pkg/configurer/installer.go index a1f977cc..fd9efb49 100644 --- a/pkg/configurer/installer.go +++ b/pkg/configurer/installer.go @@ -24,10 +24,6 @@ func GetInstaller(source string) (string, error) { return path, nil } - if path == "" { - return "", fmt.Errorf("%w; skipping failed installer download", ErrInstallerDownloadFailed) - } - path, getErr := downloadInstaller(source) if getErr != nil { return "", fmt.Errorf("%w, installer download failed; %s", ErrInstallerDownloadFailed, getErr.Error()) diff --git a/pkg/configurer/windows.go b/pkg/configurer/windows.go index 1fd5299f..2a52e5a5 100644 --- a/pkg/configurer/windows.go +++ b/pkg/configurer/windows.go @@ -39,10 +39,6 @@ func (c WindowsConfigurer) MCRConfigPath() string { return `C:\ProgramData\Docker\config\daemon.json` } -type rebootable interface { - Reboot() error -} - var errRebootRequired = fmt.Errorf("reboot required") // InstallMCRLicense for license install.. @@ -88,23 +84,41 @@ func (c WindowsConfigurer) InstallMCR(h os.Host, engineConfig commonconfig.MCRCo log.Infof("%s: running installer", h) output, err := h.ExecOutput(installCommand) + + needsReboot := false if err != nil { - return fmt.Errorf("failed to run MCR installer: %w", err) + if isExitCode3010(err) { + needsReboot = true + } else { + return fmt.Errorf("failed to run MCR installer: %w", err) + } } - if strings.Contains(output, "Your machine needs to be rebooted") { - log.Warnf("%s: host needs to be rebooted", h) - if rh, ok := h.(rebootable); ok { - if err := rh.Reboot(); err != nil { - return fmt.Errorf("%s: failed to reboot host: %w", h, err) - } + if !needsReboot && strings.Contains(output, "Your machine needs to be rebooted") { + needsReboot = true + } + + if needsReboot { + log.Warnf("%s: host needs to be rebooted after MCR install", h) + rh, ok := h.(rebootable) + if !ok { + return fmt.Errorf("%s: %w: host does not support reboot", h, errRebootRequired) + } + if err := rh.Reboot(); err != nil { + return fmt.Errorf("%s: failed to reboot host: %w", h, err) } - return fmt.Errorf("%s: %w: host isn't rebootable", h, errRebootRequired) + return nil } return nil } +// isExitCode3010 checks if the error is a command failure with Windows exit +// code 3010 (ERROR_SUCCESS_REBOOT_REQUIRED). +func isExitCode3010(err error) bool { + return err != nil && strings.Contains(err.Error(), "3010") +} + // UninstallMCR uninstalls docker-ee engine // This relies on using the http://get.mirantis.com/install.ps1 script with the '-Uninstall' option, and some cleanup as per // https://docs.microsoft.com/en-us/virtualization/windowscontainers/manage-docker/configure-docker-daemon#how-to-uninstall-docker From 65d33a49b25d7dbf03648b1c81bc91448206a761 Mon Sep 17 00:00:00 2001 From: James Nesbitt Date: Fri, 24 Apr 2026 16:45:37 +0300 Subject: [PATCH 02/10] windows configurer: override Reboot with Restart-Computer -Force rig's Windows Reboot implementation uses 'shutdown /r /t 5' which is silently ignored when Windows has a pending-reboot state (e.g. after an MCR install that exits 3010). Override Reboot() on WindowsConfigurer to use PowerShell's Restart-Computer -Force which bypasses pending-reboot locks and reliably triggers the restart. TODO: move this fix upstream into k0sproject/rig. --- pkg/configurer/windows.go | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pkg/configurer/windows.go b/pkg/configurer/windows.go index 2a52e5a5..f47f6973 100644 --- a/pkg/configurer/windows.go +++ b/pkg/configurer/windows.go @@ -119,6 +119,19 @@ func isExitCode3010(err error) bool { return err != nil && strings.Contains(err.Error(), "3010") } +// Reboot issues a forced restart via PowerShell's Restart-Computer, which +// reliably reboots Windows hosts even when a reboot is already pending (e.g. +// after an MCR install that exits with code 3010). The rig implementation uses +// 'shutdown /r /t 5' which can be silently ignored in that state. +// +// TODO: move this fix upstream into the k0sproject/rig Windows configurer. +func (c WindowsConfigurer) Reboot(h os.Host) error { + if err := h.Exec(`powershell -Command "Restart-Computer -Force"`); err != nil { + return fmt.Errorf("failed to reboot: %w", err) + } + return nil +} + // UninstallMCR uninstalls docker-ee engine // This relies on using the http://get.mirantis.com/install.ps1 script with the '-Uninstall' option, and some cleanup as per // https://docs.microsoft.com/en-us/virtualization/windowscontainers/manage-docker/configure-docker-daemon#how-to-uninstall-docker From 66dd9eff74750d283e6dff5ec45002f2a734e82e Mon Sep 17 00:00:00 2001 From: James Nesbitt Date: Fri, 24 Apr 2026 19:26:16 +0300 Subject: [PATCH 03/10] windows configurer: sleep 15s after shutdown /r /t 0 before waitForHost MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit shutdown /r /t 0 returns immediately (exit 0) before Windows has begun tearing down its network stack. waitForHost starts polling right after Reboot() returns, so all 60 echo probes (3s apart) succeed before the host ever drops WinRM — the offline window is never observed. Adding a 15-second sleep gives Windows time to start its shutdown sequence so the subsequent waitForHost(false) poll loop actually catches the host going offline. --- pkg/configurer/windows.go | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/pkg/configurer/windows.go b/pkg/configurer/windows.go index f47f6973..4e5a6912 100644 --- a/pkg/configurer/windows.go +++ b/pkg/configurer/windows.go @@ -119,16 +119,34 @@ func isExitCode3010(err error) bool { return err != nil && strings.Contains(err.Error(), "3010") } -// Reboot issues a forced restart via PowerShell's Restart-Computer, which +// Reboot issues an immediate forced restart via shutdown /r /t 0, which // reliably reboots Windows hosts even when a reboot is already pending (e.g. // after an MCR install that exits with code 3010). The rig implementation uses // 'shutdown /r /t 5' which can be silently ignored in that state. // +// After issuing the command we sleep briefly so that Windows has time to begin +// its shutdown sequence before the caller's waitForHost poll loop starts. +// Without this delay the host may return WinRM responses for several seconds +// after shutdown /r /t 0 returns, causing waitForHost to never see an offline +// window. +// +// The WinRM session may be forcibly terminated by the OS during shutdown, +// causing the exec to return an error. We tolerate that by also accepting +// errors whose message contains "connection" or "closed" — those indicate +// the reboot is in progress. +// // TODO: move this fix upstream into the k0sproject/rig Windows configurer. func (c WindowsConfigurer) Reboot(h os.Host) error { - if err := h.Exec(`powershell -Command "Restart-Computer -Force"`); err != nil { - return fmt.Errorf("failed to reboot: %w", err) + if err := h.Exec("shutdown /r /t 0"); err != nil { + // The OS may kill the WinRM session before the command returns; + // treat connection-level errors as success since the reboot is underway. + errMsg := err.Error() + if !strings.Contains(errMsg, "connection") && !strings.Contains(errMsg, "closed") && !strings.Contains(errMsg, "EOF") { + return fmt.Errorf("failed to reboot: %w", err) + } } + // Allow Windows time to start shutting down before waitForHost begins polling. + time.Sleep(15 * time.Second) return nil } From 5c9d2bf52cee8600cb9addc843be95b9aa5cd048 Mon Sep 17 00:00:00 2001 From: James Nesbitt Date: Fri, 24 Apr 2026 19:57:20 +0300 Subject: [PATCH 04/10] windows configurer: use shutdown /r /f /t 0 to force-close applications The /f flag forces running applications to close, which is necessary on Windows Server 2025 where processes can block a pending reboot. Without it, 'shutdown /r /t 0' completes but the host never actually reboots. --- pkg/configurer/windows.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/configurer/windows.go b/pkg/configurer/windows.go index 4e5a6912..ec851ac1 100644 --- a/pkg/configurer/windows.go +++ b/pkg/configurer/windows.go @@ -137,7 +137,7 @@ func isExitCode3010(err error) bool { // // TODO: move this fix upstream into the k0sproject/rig Windows configurer. func (c WindowsConfigurer) Reboot(h os.Host) error { - if err := h.Exec("shutdown /r /t 0"); err != nil { + if err := h.Exec("shutdown /r /f /t 0"); err != nil { // The OS may kill the WinRM session before the command returns; // treat connection-level errors as success since the reboot is underway. errMsg := err.Error() From 492b07d7c5a77457f64ace23c5d09a747a378b3f Mon Sep 17 00:00:00 2001 From: James Nesbitt Date: Fri, 24 Apr 2026 20:20:21 +0300 Subject: [PATCH 05/10] windows configurer: reboot via SYSTEM scheduled task AWS EC2 WinRM sessions run under a filtered Administrator token that lacks SeShutdownPrivilege. 'shutdown /r /f /t 0' succeeds (exit 0) but is silently ignored because the token has insufficient privilege. Fix: create a one-shot scheduled task running as SYSTEM (which always holds SeShutdownPrivilege) and trigger it immediately. SYSTEM-context tasks bypass the WinRM token restriction and reliably trigger the reboot. --- pkg/configurer/windows.go | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/pkg/configurer/windows.go b/pkg/configurer/windows.go index ec851ac1..64531a0e 100644 --- a/pkg/configurer/windows.go +++ b/pkg/configurer/windows.go @@ -119,30 +119,32 @@ func isExitCode3010(err error) bool { return err != nil && strings.Contains(err.Error(), "3010") } -// Reboot issues an immediate forced restart via shutdown /r /t 0, which -// reliably reboots Windows hosts even when a reboot is already pending (e.g. -// after an MCR install that exits with code 3010). The rig implementation uses -// 'shutdown /r /t 5' which can be silently ignored in that state. +// Reboot triggers an immediate forced restart by scheduling a one-shot SYSTEM +// task that runs 'shutdown /r /f /t 0'. Running via a scheduled task bypasses +// the filtered Administrator token used by WinRM sessions, which lacks +// SeShutdownPrivilege on AWS EC2 instances. The rig implementation uses +// 'shutdown /r /t 5' directly in the WinRM session, which is silently ignored +// in that context. // -// After issuing the command we sleep briefly so that Windows has time to begin +// After scheduling the task we sleep briefly so that Windows has time to begin // its shutdown sequence before the caller's waitForHost poll loop starts. -// Without this delay the host may return WinRM responses for several seconds -// after shutdown /r /t 0 returns, causing waitForHost to never see an offline -// window. -// -// The WinRM session may be forcibly terminated by the OS during shutdown, -// causing the exec to return an error. We tolerate that by also accepting -// errors whose message contains "connection" or "closed" — those indicate -// the reboot is in progress. // // TODO: move this fix upstream into the k0sproject/rig Windows configurer. func (c WindowsConfigurer) Reboot(h os.Host) error { - if err := h.Exec("shutdown /r /f /t 0"); err != nil { - // The OS may kill the WinRM session before the command returns; - // treat connection-level errors as success since the reboot is underway. + const taskName = "LaunchpadReboot" + // Create (or overwrite) a one-shot scheduled task running as SYSTEM, then + // trigger it immediately. SYSTEM always holds SeShutdownPrivilege. + create := fmt.Sprintf(`schtasks /create /tn "%s" /tr "shutdown /r /f /t 0" /sc once /st 00:00 /f /ru SYSTEM`, taskName) + if err := h.Exec(create); err != nil { + return fmt.Errorf("failed to create reboot task: %w", err) + } + run := fmt.Sprintf(`schtasks /run /tn "%s"`, taskName) + if err := h.Exec(run); err != nil { + // Tolerate connection-level errors; the OS may kill WinRM as it starts + // rebooting before the run command returns. errMsg := err.Error() if !strings.Contains(errMsg, "connection") && !strings.Contains(errMsg, "closed") && !strings.Contains(errMsg, "EOF") { - return fmt.Errorf("failed to reboot: %w", err) + return fmt.Errorf("failed to run reboot task: %w", err) } } // Allow Windows time to start shutting down before waitForHost begins polling. From fdea0123f0ec864ded6776b6c8bd404601d90fa3 Mon Sep 17 00:00:00 2001 From: James Nesbitt Date: Sat, 25 Apr 2026 08:52:28 +0300 Subject: [PATCH 06/10] windows configurer: use /sc onstart to avoid schtasks stderr warning /sc once /st 00:00 causes schtasks to write a warning to stderr when the scheduled time is in the past. Rig treats any stderr output as an error, causing Reboot() to fail even though the task was created successfully. /sc onstart requires no start time and creates the task silently. --- pkg/configurer/windows.go | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/pkg/configurer/windows.go b/pkg/configurer/windows.go index 64531a0e..78832a9a 100644 --- a/pkg/configurer/windows.go +++ b/pkg/configurer/windows.go @@ -119,22 +119,26 @@ func isExitCode3010(err error) bool { return err != nil && strings.Contains(err.Error(), "3010") } -// Reboot triggers an immediate forced restart by scheduling a one-shot SYSTEM -// task that runs 'shutdown /r /f /t 0'. Running via a scheduled task bypasses -// the filtered Administrator token used by WinRM sessions, which lacks -// SeShutdownPrivilege on AWS EC2 instances. The rig implementation uses -// 'shutdown /r /t 5' directly in the WinRM session, which is silently ignored -// in that context. +// Reboot triggers an immediate forced restart by scheduling a SYSTEM-context +// task that runs 'shutdown /r /f /t 0', then immediately triggering it. +// Running via a scheduled task bypasses the filtered Administrator token used +// by WinRM sessions on AWS EC2, which lacks SeShutdownPrivilege. The rig +// implementation uses 'shutdown /r /t 5' directly in the WinRM session, +// which is silently ignored in that context. // -// After scheduling the task we sleep briefly so that Windows has time to begin -// its shutdown sequence before the caller's waitForHost poll loop starts. +// /sc onstart is used instead of /sc once to avoid schtasks writing a +// stderr warning about the start time being in the past, which rig treats +// as an error. +// +// After scheduling the task we sleep briefly so that Windows has time to +// begin its shutdown sequence before the caller's waitForHost poll loop starts. // // TODO: move this fix upstream into the k0sproject/rig Windows configurer. func (c WindowsConfigurer) Reboot(h os.Host) error { const taskName = "LaunchpadReboot" // Create (or overwrite) a one-shot scheduled task running as SYSTEM, then // trigger it immediately. SYSTEM always holds SeShutdownPrivilege. - create := fmt.Sprintf(`schtasks /create /tn "%s" /tr "shutdown /r /f /t 0" /sc once /st 00:00 /f /ru SYSTEM`, taskName) + create := fmt.Sprintf(`schtasks /create /tn "%s" /tr "shutdown /r /f /t 0" /sc onstart /f /ru SYSTEM`, taskName) if err := h.Exec(create); err != nil { return fmt.Errorf("failed to create reboot task: %w", err) } From ccd441bc15e4323eb725370b66055cc6a774390d Mon Sep 17 00:00:00 2001 From: James Nesbitt Date: Mon, 27 Apr 2026 10:41:06 +0300 Subject: [PATCH 07/10] docker: fix deadlock in PullImages worker goroutines Each worker locked mutex at entry (line 106) and deferred unlock (line 107), then attempted a second mutex.Lock() on the error path (line 114). The second lock deadlocked the goroutine since it already held the mutex. workerpool.StopWait() then blocked forever waiting for the deadlocked worker to finish. Fix: remove the outer lock/defer and only lock when recording an error, using an early-return guard so only the first error is kept. --- pkg/docker/image.go | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/pkg/docker/image.go b/pkg/docker/image.go index d9275c0e..b8cc19cd 100644 --- a/pkg/docker/image.go +++ b/pkg/docker/image.go @@ -103,16 +103,12 @@ func PullImages(h *mkeconfig.Host, images []*Image) error { for _, image := range images { i := image // So we can safely pass i forward to pool without it getting mutated wp.Submit(func() { - mutex.Lock() - defer mutex.Unlock() - if lastError != nil { - return - } - - err := i.Pull(h) - if err != nil { + if err := i.Pull(h); err != nil { mutex.Lock() - lastError = err + if lastError == nil { + lastError = err + } + mutex.Unlock() } }) } From c8c44a28a511f33581694acf41aafa4365ca1ed0 Mon Sep 17 00:00:00 2001 From: James Nesbitt Date: Mon, 27 Apr 2026 12:03:16 +0300 Subject: [PATCH 08/10] windows configurer: delete LaunchpadReboot task after reboot completes The schtask is scheduled with /sc onstart, meaning it fires on every system startup. Without cleanup, the task triggers a second reboot when the machine comes back up after the MCR-install reboot, causing docker swarm join (and any subsequent operation) to fail because the host reboots again mid-flight. Delete the task immediately after rh.Reboot() returns (machine is back up and WinRM is reconnected) to prevent it from firing on subsequent startups. --- pkg/configurer/windows.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pkg/configurer/windows.go b/pkg/configurer/windows.go index 78832a9a..8d1a3772 100644 --- a/pkg/configurer/windows.go +++ b/pkg/configurer/windows.go @@ -107,6 +107,11 @@ func (c WindowsConfigurer) InstallMCR(h os.Host, engineConfig commonconfig.MCRCo if err := rh.Reboot(); err != nil { return fmt.Errorf("%s: failed to reboot host: %w", h, err) } + // Machine is back up. Delete the ONSTART scheduled task so it does not + // trigger another reboot on subsequent startups. + if err := h.Exec(`schtasks /delete /tn "LaunchpadReboot" /f`); err != nil { + log.Warnf("%s: failed to clean up LaunchpadReboot task: %s", h, err) + } return nil } From 08443f002087ed22607b808ffe1eb4cdf5717b64 Mon Sep 17 00:00:00 2001 From: James Nesbitt Date: Mon, 27 Apr 2026 12:20:59 +0300 Subject: [PATCH 09/10] windows configurer: delete LaunchpadReboot task before reboot fires The ONSTART schtask fires on every startup, causing repeated reboots. The post-reboot cleanup in InstallMCR is too late -- the task has already triggered a second reboot by the time the cleanup runs. Fix: use /t 5 (5-second countdown) so the task can be deleted immediately after it is triggered but before the OS actually executes the shutdown. This prevents the task from re-firing on subsequent startups. The post-reboot cleanup in InstallMCR is kept as a fallback in case the pre-delete fails (e.g. the WinRM session is dropped in the 5s window). --- pkg/configurer/windows.go | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/pkg/configurer/windows.go b/pkg/configurer/windows.go index 8d1a3772..b98e1035 100644 --- a/pkg/configurer/windows.go +++ b/pkg/configurer/windows.go @@ -141,9 +141,11 @@ func isExitCode3010(err error) bool { // TODO: move this fix upstream into the k0sproject/rig Windows configurer. func (c WindowsConfigurer) Reboot(h os.Host) error { const taskName = "LaunchpadReboot" - // Create (or overwrite) a one-shot scheduled task running as SYSTEM, then - // trigger it immediately. SYSTEM always holds SeShutdownPrivilege. - create := fmt.Sprintf(`schtasks /create /tn "%s" /tr "shutdown /r /f /t 0" /sc onstart /f /ru SYSTEM`, taskName) + // Create a SYSTEM-context ONSTART task that runs 'shutdown /r /f /t 5'. + // The 5-second delay gives us time to delete the task before the OS + // actually executes the reboot, preventing it from firing again on the + // next startup. + create := fmt.Sprintf(`schtasks /create /tn "%s" /tr "shutdown /r /f /t 5" /sc onstart /f /ru SYSTEM`, taskName) if err := h.Exec(create); err != nil { return fmt.Errorf("failed to create reboot task: %w", err) } @@ -156,7 +158,15 @@ func (c WindowsConfigurer) Reboot(h os.Host) error { return fmt.Errorf("failed to run reboot task: %w", err) } } - // Allow Windows time to start shutting down before waitForHost begins polling. + // Delete the task immediately while the 5-second shutdown timer is still + // counting down. This prevents it from re-firing on subsequent startups. + del := fmt.Sprintf(`schtasks /delete /tn "%s" /f`, taskName) + if err := h.Exec(del); err != nil { + // Best-effort: warn but don't fail — the post-reboot cleanup in + // InstallMCR will attempt deletion again once the host is back up. + log.Warnf("%v: failed to pre-delete reboot task (will retry after reboot): %s", h, err) + } + // Allow Windows time to complete shutdown before waitForHost begins polling. time.Sleep(15 * time.Second) return nil } From 78d209d6421dc6c3c3949d008c65080718162a17 Mon Sep 17 00:00:00 2001 From: James Nesbitt Date: Tue, 28 Apr 2026 10:17:32 +0300 Subject: [PATCH 10/10] Address Copilot PR review comments pkg/docker/image.go: - Fix StopWait() race: call wp.StopWait() explicitly before reading lastError instead of deferring it. A deferred StopWait() evaluates the return expression before workers finish, potentially returning nil when a worker later records an error. pkg/configurer/windows.go: - isExitCode3010: tighten match string from '3010' to 'non-zero exit code: 3010' to avoid false positives on error messages that incidentally contain those digits. - Reboot: fix comment '/t 0' -> '/t 5' to match the actual command. - Reboot: update top-level comment to describe the actual schtask mechanism; remove stale reference to Restart-Computer -Force from an earlier iteration. --- pkg/configurer/windows.go | 20 +++++++++++--------- pkg/docker/image.go | 5 ++++- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/pkg/configurer/windows.go b/pkg/configurer/windows.go index b98e1035..77ebc024 100644 --- a/pkg/configurer/windows.go +++ b/pkg/configurer/windows.go @@ -121,22 +121,24 @@ func (c WindowsConfigurer) InstallMCR(h os.Host, engineConfig commonconfig.MCRCo // isExitCode3010 checks if the error is a command failure with Windows exit // code 3010 (ERROR_SUCCESS_REBOOT_REQUIRED). func isExitCode3010(err error) bool { - return err != nil && strings.Contains(err.Error(), "3010") + return err != nil && strings.Contains(err.Error(), "non-zero exit code: 3010") } // Reboot triggers an immediate forced restart by scheduling a SYSTEM-context -// task that runs 'shutdown /r /f /t 0', then immediately triggering it. +// one-shot task that runs 'shutdown /r /f /t 5', then immediately triggering +// and deleting it within the 5-second countdown window. +// // Running via a scheduled task bypasses the filtered Administrator token used -// by WinRM sessions on AWS EC2, which lacks SeShutdownPrivilege. The rig -// implementation uses 'shutdown /r /t 5' directly in the WinRM session, -// which is silently ignored in that context. +// by WinRM sessions on AWS EC2, which lacks SeShutdownPrivilege. Issuing +// 'shutdown /r' directly in the WinRM session is silently ignored in that +// context. // // /sc onstart is used instead of /sc once to avoid schtasks writing a // stderr warning about the start time being in the past, which rig treats -// as an error. -// -// After scheduling the task we sleep briefly so that Windows has time to -// begin its shutdown sequence before the caller's waitForHost poll loop starts. +// as an error. The task is deleted immediately after triggering (while the +// 5-second timer counts down) so it does not re-fire on subsequent startups. +// A post-reboot cleanup in InstallMCR provides a second deletion attempt as +// a fallback. // // TODO: move this fix upstream into the k0sproject/rig Windows configurer. func (c WindowsConfigurer) Reboot(h os.Host) error { diff --git a/pkg/docker/image.go b/pkg/docker/image.go index b8cc19cd..312764ea 100644 --- a/pkg/docker/image.go +++ b/pkg/docker/image.go @@ -95,7 +95,6 @@ func (i *Image) Exist(h *mkeconfig.Host) bool { // PullImages pulls multiple images parallelly by using a worker pool. func PullImages(h *mkeconfig.Host, images []*Image) error { wp := workerpool.New(5) - defer wp.StopWait() var mutex sync.Mutex var lastError error @@ -113,6 +112,10 @@ func PullImages(h *mkeconfig.Host, images []*Image) error { }) } + // Wait for all workers to complete before reading lastError. + // A deferred StopWait() would let the return expression evaluate + // before workers finish, potentially returning nil on a real error. + wp.StopWait() return lastError }