Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions install.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,9 @@ function Stop-GitAiManagedProcesses {
$pids = @($processes | Sort-Object ProcessId -Unique | Select-Object -ExpandProperty ProcessId)
Write-Warning ("Stopping lingering git-ai processes: {0}" -f ($pids -join ', '))

foreach ($pid in $pids) {
foreach ($managedPid in $pids) {
try {
Stop-Process -Id $pid -Force -ErrorAction Stop
Stop-Process -Id $managedPid -Force -ErrorAction Stop
} catch { }
}

Expand Down Expand Up @@ -521,6 +521,13 @@ try {
Write-Warning "Warning: Failed to set up IDE/agent hooks. Please try running 'git-ai install-hooks' manually."
}

# Best-effort restart so detached self-updates can bring the background service back.
try {
& $finalExe bg start *> $null
} catch {
Write-Warning "Warning: Failed to restart git-ai background service automatically."
}

# Update PATH so our shim takes precedence over any Git entries
$skipPathUpdate = $env:GIT_AI_SKIP_PATH_UPDATE -eq '1'
if ($skipPathUpdate) {
Expand Down
24 changes: 22 additions & 2 deletions src/commands/daemon.rs
Original file line number Diff line number Diff line change
Expand Up @@ -181,14 +181,34 @@ fn handle_run(args: &[String]) -> Result<(), String> {
.enable_all()
.build()
.map_err(|e| e.to_string())?;
runtime
let exit_action = runtime
.block_on(async move { crate::daemon::run_daemon(config).await })
.map_err(|e| e.to_string())?;

// Daemon is fully dead (lock released, sockets removed, threads joined).
// Now safe to self-update — install.sh can start a fresh daemon.
// Now safe to self-update — the daemon can decide whether a fresh instance
// should be started based on the shutdown reason and install outcome.
#[cfg(not(windows))]
let update_installed = crate::daemon::daemon_run_pending_self_update();

#[cfg(windows)]
crate::daemon::daemon_run_pending_self_update();

match exit_action {
crate::daemon::DaemonExitAction::Stop => {}
crate::daemon::DaemonExitAction::Restart => {
ensure_daemon_running(Duration::from_secs(5)).map(|_| ())?;
}
crate::daemon::DaemonExitAction::RestartAfterUpdate => {
#[cfg(not(windows))]
{
if update_installed {
ensure_daemon_running(Duration::from_secs(5)).map(|_| ())?;
}
}
}
}

Ok(())
}

Expand Down
2 changes: 1 addition & 1 deletion src/commands/upgrade.rs
Original file line number Diff line number Diff line change
Expand Up @@ -661,7 +661,7 @@ pub fn run_with_args(args: &[String]) {
}

fn run_impl(force: bool, background: bool) {
let config = config::Config::get();
let config = config::Config::fresh();
let channel = config.update_channel();
let skip_install = background && config.auto_updates_disabled();
let _ = run_impl_with_url(force, config.api_base_url(), channel, skip_install);
Expand Down
6 changes: 3 additions & 3 deletions src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1691,10 +1691,10 @@ mod tests {
let dir = tempfile::tempdir().unwrap();
let git_ai = dir.path().join("git-ai");
fs::write(&git_ai, "fake-binary").unwrap();
let git = dir.path().join("git");
let _git = dir.path().join("git");
#[cfg(unix)]
fs::hard_link(&git_ai, &git).unwrap();
fs::hard_link(&git_ai, &_git).unwrap();
#[cfg(unix)]
assert!(path_is_git_ai_binary(&git));
assert!(path_is_git_ai_binary(&_git));
}
}
91 changes: 84 additions & 7 deletions src/daemon.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ use std::io::{BufRead, BufReader, Read, Seek, SeekFrom, Write};
#[cfg(windows)]
use std::os::windows::io::{AsRawHandle, FromRawHandle, IntoRawHandle};
use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
use std::sync::atomic::{AtomicBool, AtomicU8, AtomicUsize, Ordering};
use std::sync::{Arc, Mutex};
use std::time::{SystemTime, UNIX_EPOCH};
use tokio::sync::{Mutex as AsyncMutex, Notify, mpsc, oneshot};
Expand Down Expand Up @@ -3659,6 +3659,7 @@ struct ActorDaemonCoordinator {
wrapper_states: Mutex<HashMap<String, WrapperStateEntry>>,
wrapper_state_notify: Notify,
shutting_down: AtomicBool,
shutdown_action: AtomicU8,
shutdown_notify: Notify,
shutdown_condvar: std::sync::Condvar,
shutdown_condvar_mutex: Mutex<()>,
Expand All @@ -3670,6 +3671,31 @@ struct WrapperStateEntry {
received_at_ns: u128,
}

#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub(crate) enum DaemonExitAction {
Stop,
Restart,
RestartAfterUpdate,
}

impl DaemonExitAction {
fn as_u8(self) -> u8 {
match self {
Self::Stop => 0,
Self::Restart => 1,
Self::RestartAfterUpdate => 2,
}
}

fn from_u8(value: u8) -> Self {
match value {
1 => Self::Restart,
2 => Self::RestartAfterUpdate,
_ => Self::Stop,
}
}
}

enum TracePayloadApplyOutcome {
None,
Applied(Box<crate::daemon::domain::AppliedCommand>),
Expand Down Expand Up @@ -3720,6 +3746,7 @@ impl ActorDaemonCoordinator {
wrapper_states: Mutex::new(HashMap::new()),
wrapper_state_notify: Notify::new(),
shutting_down: AtomicBool::new(false),
shutdown_action: AtomicU8::new(DaemonExitAction::Stop.as_u8()),
shutdown_notify: Notify::new(),
shutdown_condvar: std::sync::Condvar::new(),
shutdown_condvar_mutex: Mutex::new(()),
Expand All @@ -3745,6 +3772,30 @@ impl ActorDaemonCoordinator {
self.shutdown_condvar.notify_all();
}
Comment on lines 3772 to 3773
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 Explicit Shutdown control request does not reset shutdown_action, causing unintended restart

When the update check loop or uptime check calls request_restart() or request_restart_after_update(), they store a non-Stop value in shutdown_action before calling request_shutdown(). However, an explicit ControlRequest::Shutdown (e.g. from git-ai bg shutdown) at src/daemon.rs:7389 only calls request_shutdown(), which never resets shutdown_action back to Stop. If the timing aligns (update/uptime check sets a restart intent, then user sends an explicit shutdown before the daemon finishes draining), the daemon will still attempt to restart in src/commands/daemon.rs:199-200 despite the explicit stop request. The request_shutdown method at src/daemon.rs:3760 should reset shutdown_action to DaemonExitAction::Stop so that an explicit shutdown always overrides a previously-set restart intent.

(Refers to lines 3760-3773)

Prompt for agents
The problem is in ActorDaemonCoordinator::request_shutdown (src/daemon.rs around line 3760). This method sets shutting_down to true and notifies waiters, but it does NOT reset shutdown_action to DaemonExitAction::Stop. The request_restart and request_restart_after_update methods store a non-Stop value in shutdown_action BEFORE calling request_shutdown. An explicit ControlRequest::Shutdown (processed at src/daemon.rs:7389) calls request_shutdown() which preserves the restart intent.

To fix: request_shutdown should reset shutdown_action to Stop, ensuring that any explicit shutdown overrides a previously set restart intent. The request_restart and request_restart_after_update methods already store their desired action before calling request_shutdown, so the ordering needs to be: request_restart stores the action, then request_shutdown would normally reset it — this won't work.

Alternative approach: Instead of modifying request_shutdown, add a separate request_stop method that resets shutdown_action to Stop and then calls request_shutdown, and use it from the ControlRequest::Shutdown handler at line 7389. Or, reverse the order in request_restart/request_restart_after_update to call request_shutdown first and then store the action (using a compare-and-swap to avoid overwriting a Stop that was set by an explicit shutdown). The cleanest approach is probably to make the Shutdown control handler explicitly store DaemonExitAction::Stop before calling request_shutdown.
Open in Devin Review

Was this helpful? React with 👍 or 👎 to provide feedback.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Patched this by adding an explicit request_stop() path and using it for ControlRequest::Shutdown, so a user-initiated stop now overrides any previously queued restart or restart-after-update intent. I also reran the daemon restart/update lifecycle tests locally after the change.


fn request_stop(&self) {
self.shutdown_action
.store(DaemonExitAction::Stop.as_u8(), Ordering::SeqCst);
self.request_shutdown();
}

fn request_restart(&self) {
self.shutdown_action
.store(DaemonExitAction::Restart.as_u8(), Ordering::SeqCst);
self.request_shutdown();
}

fn request_restart_after_update(&self) {
self.shutdown_action.store(
DaemonExitAction::RestartAfterUpdate.as_u8(),
Ordering::SeqCst,
);
self.request_shutdown();
}

fn shutdown_action(&self) -> DaemonExitAction {
DaemonExitAction::from_u8(self.shutdown_action.load(Ordering::SeqCst))
}

async fn wait_for_shutdown(&self) {
// Register the Notified future BEFORE checking the flag so that a
// request_shutdown() racing between the check and the await cannot
Expand Down Expand Up @@ -7341,7 +7392,7 @@ fn handle_control_connection_actor_reader<R: Read + Write>(
reader.get_mut().write_all(b"\n")?;
reader.get_mut().flush()?;
if shutdown_after_response {
coordinator.request_shutdown();
coordinator.request_stop();
}
}
Ok(())
Expand Down Expand Up @@ -7635,7 +7686,7 @@ fn daemon_update_check_loop(coordinator: Arc<ActorDaemonCoordinator>, started_at
match check_for_update_available() {
Ok(DaemonUpdateCheckResult::UpdateReady) => {
debug_log("daemon update check: newer version available, requesting shutdown");
coordinator.request_shutdown();
coordinator.request_restart_after_update();
return;
}
Ok(DaemonUpdateCheckResult::NoUpdate) => {
Expand All @@ -7649,7 +7700,7 @@ fn daemon_update_check_loop(coordinator: Arc<ActorDaemonCoordinator>, started_at
let uptime_ns = now_unix_nanos().saturating_sub(started_at_ns);
if uptime_ns >= daemon_max_uptime_ns() {
debug_log("daemon uptime exceeded max, requesting restart");
coordinator.request_shutdown();
coordinator.request_restart();
return;
}
}
Expand All @@ -7660,25 +7711,28 @@ fn daemon_update_check_loop(coordinator: Arc<ActorDaemonCoordinator>, started_at
/// On Unix the installer atomically replaces the binary via `mv`; on Windows
/// the installer is spawned as a detached process that polls until the exe is
/// unlocked.
pub(crate) fn daemon_run_pending_self_update() {
pub(crate) fn daemon_run_pending_self_update() -> bool {
use crate::commands::upgrade::{
DaemonUpdateCheckResult, check_and_install_update_if_available,
};

match check_and_install_update_if_available() {
Ok(DaemonUpdateCheckResult::UpdateReady) => {
debug_log("daemon self-update: installation completed successfully");
true
}
Ok(DaemonUpdateCheckResult::NoUpdate) => {
debug_log("daemon self-update: no update to install");
false
}
Err(err) => {
debug_log(&format!("daemon self-update: installation failed: {}", err));
false
}
}
}

pub async fn run_daemon(config: DaemonConfig) -> Result<(), GitAiError> {
pub(crate) async fn run_daemon(config: DaemonConfig) -> Result<DaemonExitAction, GitAiError> {
sanitize_git_env_for_daemon();
disable_trace2_for_daemon_process();
config.ensure_parent_dirs()?;
Expand Down Expand Up @@ -7771,7 +7825,7 @@ pub async fn run_daemon(config: DaemonConfig) -> Result<(), GitAiError> {
remove_socket_if_exists(&config.control_socket_path)?;
remove_pid_metadata(&config)?;

Ok(())
Ok(coordinator.shutdown_action())
}

fn checkpoint_control_timeout_uses_ci_or_test_budget() -> bool {
Expand Down Expand Up @@ -8246,6 +8300,29 @@ mod tests {
assert_eq!(normalized.get("example.txt"), carryover.get("example.txt"));
}

#[test]
fn explicit_stop_overrides_prior_restart_intent() {
let runtime = tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
.unwrap();

runtime.block_on(async {
let coordinator = ActorDaemonCoordinator::new();

coordinator.request_restart_after_update();
assert_eq!(
coordinator.shutdown_action(),
DaemonExitAction::RestartAfterUpdate
);

coordinator.request_stop();

assert!(coordinator.is_shutting_down());
assert_eq!(coordinator.shutdown_action(), DaemonExitAction::Stop);
});
}

#[test]
fn recent_working_log_snapshot_preserves_humans_on_restore() {
use crate::authorship::attribution_tracker::LineAttribution;
Expand Down
98 changes: 90 additions & 8 deletions tests/daemon_mode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4045,6 +4045,87 @@ fn spawn_daemon_with_env(repo: &TestRepo, extra_env: &[(&str, String)]) -> Child
panic!("daemon did not become ready");
}

/// When the daemon exceeds its configured max uptime, it should restart and
/// make a fresh control socket available.
#[test]
#[serial]
fn daemon_max_uptime_restarts_and_recovers() {
let repo = TestRepo::new_with_mode(GitTestMode::Wrapper);

let mut child = spawn_daemon_with_env(
&repo,
&[
("GIT_AI_DAEMON_UPDATE_CHECK_INTERVAL", "1".to_string()),
("GIT_AI_DAEMON_MAX_UPTIME_SECS", "5".to_string()),
],
);

let original_pid = child.id();
let deadline = std::time::Instant::now() + Duration::from_secs(30);
loop {
if let Some(status) = child.try_wait().expect("failed to poll daemon") {
assert!(
status.success(),
"daemon should exit cleanly before restart, got: {}",
status
);
break;
}
if std::time::Instant::now() >= deadline {
let _ = child.kill();
let _ = child.wait();
panic!("daemon did not exit within 30s after exceeding max uptime");
}
thread::sleep(Duration::from_millis(100));
}

let control_socket_path = daemon_control_socket_path(&repo);
let trace_socket_path = daemon_trace_socket_path(&repo);
let repo_workdir = repo_workdir_string(&repo);
let restarted_pid = loop {
if send_control_request(
&control_socket_path,
&ControlRequest::StatusFamily {
repo_working_dir: repo_workdir.clone(),
},
)
.is_ok()
&& local_socket_connects_with_timeout(&trace_socket_path, DAEMON_TEST_PROBE_TIMEOUT)
.is_ok()
{
let daemon_config = DaemonConfig::from_home(&repo.daemon_home_path());
let pid = read_daemon_pid(&daemon_config)
.expect("restarted daemon should publish pid metadata");
break pid;
}
if std::time::Instant::now() >= deadline {
panic!("daemon did not restart within 30s after max uptime exit");
}
thread::sleep(Duration::from_millis(100));
};

assert_ne!(
restarted_pid, original_pid,
"daemon restart should produce a new process id"
);

let _ = send_control_request(&control_socket_path, &ControlRequest::Shutdown);
for _ in 0..200 {
if send_control_request(
&control_socket_path,
&ControlRequest::StatusFamily {
repo_working_dir: repo_workdir.clone(),
},
)
.is_err()
{
return;
}
thread::sleep(Duration::from_millis(25));
}
panic!("restarted daemon did not shut down cleanly");
}

/// Config patch JSON that enables version checks and auto-updates (they
/// default to disabled in non-OSS debug builds).
fn update_enabled_config_patch() -> String {
Expand All @@ -4058,6 +4139,12 @@ fn update_enabled_config_patch() -> String {
/// Verifies the daemon update check loop lifecycle: when a cached update is
/// present and the check interval is short, the daemon should detect the
/// pending update, request a graceful shutdown, and exit on its own.
///
/// The test seeds only the on-disk "update available" cache entry; it does not
/// stand up a mock release server. The follow-on self-update attempt may
/// therefore fail after the daemon has already decided to shut down, so the
/// contract we care about here is the shutdown lifecycle rather than a
/// zero-exit install result.
#[test]
#[serial]
fn daemon_update_check_loop_detects_cached_update_and_shuts_down() {
Expand All @@ -4077,20 +4164,15 @@ fn daemon_update_check_loop_detects_cached_update_and_shuts_down() {
// The daemon should self-shutdown after detecting the cached update.
// With a 1-second interval the tick is clamped to 1s, so it should
// exit within a few seconds.
let deadline = std::time::Instant::now() + Duration::from_secs(15);
let deadline = std::time::Instant::now() + Duration::from_secs(30);
loop {
if let Some(status) = child.try_wait().expect("failed to poll daemon") {
assert!(
status.success(),
"daemon should exit cleanly after update-triggered shutdown, got: {}",
status
);
if child.try_wait().expect("failed to poll daemon").is_some() {
break;
}
if std::time::Instant::now() >= deadline {
let _ = child.kill();
let _ = child.wait();
panic!("daemon did not self-shutdown within 15s after detecting cached update");
panic!("daemon did not self-shutdown within 30s after detecting cached update");
}
thread::sleep(Duration::from_millis(100));
}
Expand Down
Loading
Loading