Skip to content

Commit 2c0bbad

Browse files
fix: always capture service logs during startup and show on failure (#202)
1 parent 5a2115d commit 2c0bbad

15 files changed

Lines changed: 546 additions & 84 deletions

cmd/run.go

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -391,6 +391,9 @@ func runTests(cmd *cobra.Command, args []string) error {
391391
var tests []runner.Test
392392
var err error
393393

394+
// Track overall timing for print mode (includes test loading)
395+
overallStart := time.Now()
396+
394397
// Step 3: Load tests - in cloud mode, fetch from backend; otherwise use local files
395398
deferLoadTests := interactive
396399
if deferLoadTests {
@@ -457,7 +460,7 @@ func runTests(cmd *cobra.Command, args []string) error {
457460
if isValidation {
458461
log.Stderrln(fmt.Sprintf("\n➤ Found %d traces to validate", len(tests)))
459462
} else if !cloud {
460-
log.Stderrln(fmt.Sprintf("\n➤ Loaded %d tests from local traces", len(tests)))
463+
log.Stderrln(fmt.Sprintf("\n➤ Loaded %d tests from local traces (%.1fs)", len(tests), time.Since(overallStart).Seconds()))
461464
}
462465
}
463466

@@ -732,13 +735,25 @@ func runTests(cmd *cobra.Command, args []string) error {
732735
}
733736

734737
// Step 4: Run tests by environment
738+
testPhaseStart := time.Now()
735739
var results []runner.TestResult
736740
if groupResult != nil && len(groupResult.Groups) > 0 {
737741
// Use environment-based replay
738742
results, err = runner.ReplayTestsByEnvironment(context.Background(), executor, groupResult.Groups)
739743
if err != nil {
740744
cmd.SilenceUsage = true
741745

746+
// Dump startup logs so user can diagnose startup failures
747+
startupLogs := executor.GetStartupLogs()
748+
if startupLogs != "" {
749+
log.Stderrln("\n📋 Service startup logs:")
750+
for _, line := range strings.Split(strings.TrimRight(startupLogs, "\n"), "\n") {
751+
log.Stderrln(line)
752+
}
753+
log.Stderrln("")
754+
}
755+
log.Stderr(executor.GetStartupFailureHelpMessage())
756+
742757
// Update CI status to FAILURE if in cloud mode
743758
if cloud && client != nil && (ci || isValidation) {
744759
statusReq := &backend.UpdateDriftRunCIStatusRequest{
@@ -780,6 +795,14 @@ func runTests(cmd *cobra.Command, args []string) error {
780795
}
781796
}
782797

798+
startupLogs := executor.GetStartupLogs()
799+
if startupLogs != "" {
800+
log.Stderrln("\n📋 Service startup logs:")
801+
for _, line := range strings.Split(strings.TrimRight(startupLogs, "\n"), "\n") {
802+
log.Stderrln(line)
803+
}
804+
log.Stderrln("")
805+
}
783806
log.Stderr(executor.GetStartupFailureHelpMessage())
784807
return fmt.Errorf("failed to start environment: %w", err)
785808
}
@@ -790,7 +813,7 @@ func runTests(cmd *cobra.Command, args []string) error {
790813
}()
791814

792815
if !interactive && !quiet {
793-
log.Stderrln(" ✓ Environment ready")
816+
log.Stderrln(fmt.Sprintf(" ✓ Environment ready (%.1fs)", time.Since(testPhaseStart).Seconds()))
794817
log.Stderrln(fmt.Sprintf("➤ Running %d tests (concurrency: %d)...\n", len(tests), executor.GetConcurrency()))
795818
}
796819

@@ -830,6 +853,10 @@ func runTests(cmd *cobra.Command, args []string) error {
830853
outputErr = runner.OutputResultsSummary(results, outputFormat, quiet)
831854
}
832855

856+
if !interactive && !quiet {
857+
log.Stderrln(fmt.Sprintf("Total elapsed: %.1fs", time.Since(overallStart).Seconds()))
858+
}
859+
833860
// Step 5: Upload results to backend if in cloud mode
834861
// Do this before returning any error so CI status is always updated
835862
if cloud && client != nil && (ci || isValidation) {

docs/tui-testing.md

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
# CLI & TUI Testing Guide
2+
3+
How to manually test the tusk CLI in both print mode and interactive TUI mode.
4+
5+
## Print Mode Testing
6+
7+
Print mode (`--print`) runs headlessly — no interactive UI. Run it directly and inspect stderr:
8+
9+
```bash
10+
cd /path/to/test-project
11+
/path/to/tusk drift run --print 2>&1
12+
```
13+
14+
Filter for specific output:
15+
16+
```bash
17+
/path/to/tusk drift run --print 2>&1 | grep -E "(➤|✓|Tests:|Error:)"
18+
```
19+
20+
### Testing failure scenarios
21+
22+
To test startup failures, temporarily change the start command in `.tusk/config.yaml`:
23+
24+
```yaml
25+
start:
26+
command: node -e "console.log('boot log line'); console.error('some error'); process.exit(1)"
27+
```
28+
29+
To test with a service that starts but behaves differently, adjust the command or example codebase as needed.
30+
31+
**Always restore the config after testing.**
32+
33+
## TUI Testing with tmux
34+
35+
The TUI (interactive mode, no `--print`) requires a terminal. We use tmux for programmatic control — it lets us send keystrokes and capture output without needing to be in the terminal ourselves.
36+
37+
### Option A: Native screenshots (recommended)
38+
39+
Opens a real Terminal.app window with tmux inside it, then uses macOS `screencapture -l` to capture that specific window by ID. This produces pixel-perfect Retina screenshots and should always be used to verify TUI visual changes.
40+
41+
**One-time setup:** Grant Screen Recording permission to Terminal.app in System Settings > Privacy & Security > Screen Recording.
42+
43+
```bash
44+
# 1. Open Terminal.app with a tmux session
45+
osascript -e 'tell application "Terminal"
46+
do script "tmux new-session -s tui-test -x 200 -y 55"
47+
end tell'
48+
sleep 3
49+
50+
# 2. Resize the window to fill most of the screen (fits 14"/16" MacBook Pro)
51+
osascript -e 'tell application "Terminal" to set bounds of front window to {0, 25, 1700, 1100}'
52+
sleep 1
53+
54+
# 3. Hide tmux status bar (otherwise a green bar appears at the bottom)
55+
tmux set -t tui-test status off
56+
57+
# 4. Launch the TUI
58+
tmux send-keys -t tui-test 'cd /path/to/test-project && /path/to/tusk drift run' Enter
59+
60+
# 5. Wait for the state you want to capture
61+
# - Normal run with tests: ~25-30s (environment start + test execution)
62+
# - Startup failure with sandbox retry: ~15-18s
63+
# - Just initial render: ~3-5s
64+
sleep 25
65+
66+
# 6. Navigate if needed
67+
tmux send-keys -t tui-test g # go to top (select Service Logs)
68+
tmux send-keys -t tui-test j # move selection down
69+
tmux send-keys -t tui-test J # scroll log panel down
70+
tmux send-keys -t tui-test D # half-page down in log panel
71+
sleep 1
72+
73+
# 7. Find the Terminal.app window ID
74+
WINDOW_ID=$(python3 -c "
75+
import Quartz
76+
windows = Quartz.CGWindowListCopyWindowInfo(Quartz.kCGWindowListOptionOnScreenOnly, Quartz.kCGNullWindowID)
77+
for w in windows:
78+
if w.get('kCGWindowOwnerName') == 'Terminal' and w.get('kCGWindowLayer', 0) == 0:
79+
print(w['kCGWindowNumber'])
80+
break
81+
")
82+
83+
# 8. Capture the window
84+
screencapture -l "$WINDOW_ID" -o screenshot.png
85+
86+
# 9. Cleanup
87+
tmux send-keys -t tui-test q
88+
sleep 2
89+
tmux kill-session -t tui-test
90+
osascript -e 'tell application "Terminal" to close front window' 2>/dev/null
91+
```
92+
93+
**Output:** ~2800x1800 Retina PNG with native font rendering.
94+
95+
**Notes:**
96+
- `screencapture -l` captures by window ID — the Terminal window doesn't need to be in the foreground. You can keep working in other windows.
97+
- The `-o` flag removes the window shadow.
98+
- `screencapture -l` fails silently without Screen Recording permission — you get a blank or tiny image.
99+
- When finding the window ID, make sure to match `kCGWindowOwnerName == 'Terminal'` — other apps (Chrome, etc.) may be in front.
100+
101+
### Option B: Text capture (quick functional checks)
102+
103+
Uses a detached tmux session — no visible window, no permissions needed. Good for verifying that specific text appears in the TUI or that navigation works. **Not a substitute for screenshots** when verifying layout, colors, or visual rendering.
104+
105+
```bash
106+
# 1. Detached tmux session (no visible window)
107+
tmux new-session -d -s tui-test -x 200 -y 55
108+
109+
# 2. Launch the TUI
110+
tmux send-keys -t tui-test 'cd /path/to/test-project && /path/to/tusk drift run' Enter
111+
sleep 25
112+
113+
# 3. Capture the screen as plain text
114+
SCREEN=$(tmux capture-pane -t tui-test -p)
115+
116+
# 4. Assert on content
117+
echo "$SCREEN" | grep -q "TEST EXECUTION" || echo "FAIL: header not found"
118+
echo "$SCREEN" | grep -q "Environment ready" || echo "FAIL: environment didn't start"
119+
120+
# 5. Navigate and capture again
121+
tmux send-keys -t tui-test j
122+
sleep 0.5
123+
SCREEN=$(tmux capture-pane -t tui-test -p)
124+
125+
# 6. Cleanup
126+
tmux send-keys -t tui-test q
127+
sleep 1
128+
tmux kill-session -t tui-test
129+
```
130+
131+
### TUI keyboard shortcuts reference
132+
133+
| Key | Action |
134+
| --------- | --------------------------------------- |
135+
| `j` / `k` | Select next/previous test in left panel |
136+
| `g` / `G` | Jump to top/bottom of test list |
137+
| `u` / `d` | Half-page up/down in test list |
138+
| `J` / `K` | Scroll log panel down/up |
139+
| `U` / `D` | Half-page up/down in log panel |
140+
| `y` | Copy all logs |
141+
| `q` | Quit |
142+
143+
## Recommended Dimensions
144+
145+
| Setting | Value | Notes |
146+
| ------------ | ----- | ------------------------------------------ |
147+
| tmux columns | 200 | Wide enough for both TUI panels + detail |
148+
| tmux rows | 55 | Tall enough to see tests + logs |
149+
| Window bounds | {0, 25, 1700, 1100} | Fits 14"/16" MacBook Pro (adjust for your display) |
150+
151+
## Common Gotchas
152+
153+
1. **Timing is critical.** The TUI renders asynchronously. Capturing too early gives an incomplete screen. When in doubt, wait longer.
154+
2. **tmux status bar.** The green bar at the bottom of native screenshots is tmux's status line. Hide it with `tmux set -t tui-test status off` before capturing.
155+
3. **Scrolling.** Content often extends below the visible area of the log panel. Send `J` or `D` keys to scroll down before capturing.
156+
4. **Screen Recording permission.** Native `screencapture -l` fails silently without it. Grant it to Terminal.app in System Settings > Privacy & Security > Screen Recording.
157+
5. **YAML quoting.** When editing config.yaml with commands containing colons or quotes, wrap the entire value in double quotes and escape inner quotes.
158+
6. **Restore config.** Always restore `.tusk/config.yaml` after testing with modified start commands.
159+
7. **tmux targets by session name** (`-t tui-test`), so commands work regardless of which terminal you're focused on. You can keep working while tests run.
160+
8. **Window ID targeting.** When using `screencapture -l`, make sure the Python script finds the Terminal window, not Chrome or another app that may be in front.

internal/agent/prompts/phase_simple_test.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,10 @@ TuskDrift.initialize({
4444
Run tusk_run to replay the trace.
4545
If it fails:
4646

47-
- Run with `debug: true` (keep running it in debug mode until it passes)
48-
- If startup fails in sandbox (for example secret manager bootstrapping), retry with `sandbox_mode: "off"`
49-
- Check for errors in the output or in the logs (in .tusk/logs/). Logs only appear if `debug: true` is set.
47+
- Check the service startup logs for errors (always shown on startup failure)
48+
- If the service starts but a test replay fails, run with `debug: true` to see runtime logs (keep running it in debug mode until it passes)
49+
- If you need more detail from the SDK itself, set `logLevel: "debug"` in the SDK initialization to see SDK-level diagnostics
50+
- If startup fails in sandbox (for example secret manager bootstrapping), retry with `sandbox_mode: "off"` and if that works, add a comment in config.yaml explaining why sandbox was disabled (e.g., `# sandbox disabled: service requires external secret manager during startup`)
5051
- If you see config-related errors (e.g., "no start command"), run `tusk_validate_config` to check for config issues
5152
- Try to fix issues and retry (max 3 attempts)
5253
- If still failing, ask the user for help

internal/agent/prompts/phase_verify_complex_test.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ Follow the same process as the simple test:
2929
4. Wait 3 seconds for trace to be written
3030
5. Stop the service
3131
6. Run tusk_list to verify trace was recorded
32-
7. Run tusk_run to replay (try with `debug: true` on failure; if startup fails in sandbox, retry with `sandbox_mode: "off"`)
32+
7. Run tusk_run to replay (startup logs are always shown on startup failure; if a test replay fails, retry with `debug: true` for runtime logs; if startup fails in sandbox, retry with `sandbox_mode: "off"` and if that works, add a comment in config.yaml explaining why)
3333

3434
### Step 3: Save to Cache
3535

internal/agent/prompts/phase_verify_simple_test.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,10 @@ If no traces appear, this is a verification failure.
3232
Run tusk_run to replay the trace.
3333
If it fails:
3434

35-
- Run with `debug: true` once more
36-
- If startup fails in sandbox, retry with `sandbox_mode: "off"`
35+
- Check the service startup logs for errors (always shown on startup failure)
36+
- If the service starts but a test replay fails, retry with `debug: true` to see runtime logs
37+
- If you need more detail from the SDK, set `logLevel: "debug"` in the SDK initialization
38+
- If startup fails in sandbox, retry with `sandbox_mode: "off"` and if that works, add a comment in config.yaml explaining why sandbox was disabled
3739
- If still failing, mark as failed
3840

3941
### Step 5: Save to Cache

internal/log/log.go

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
package log
33

44
import (
5+
"context"
56
"io"
67
"log/slog"
78
"os"
@@ -90,15 +91,24 @@ func (l *Logger) process() {
9091

9192
func (l *Logger) handleLogMessage(msg logMessage) {
9293
tuiPtr := l.tuiLogger.Load()
93-
if tuiPtr == nil {
94+
if tuiPtr != nil {
95+
tui := *tuiPtr
96+
switch msg.msgType {
97+
case logTypeService:
98+
tui.LogToService(msg.message)
99+
case logTypeTest:
100+
tui.LogToCurrentTest(msg.testID, msg.message)
101+
}
94102
return
95103
}
96-
tui := *tuiPtr
104+
105+
// No TUI active (print mode). Route through slog so messages respect
106+
// the configured log level and use the standard slog format.
97107
switch msg.msgType {
98108
case logTypeService:
99-
tui.LogToService(msg.message)
109+
slog.Debug(msg.message)
100110
case logTypeTest:
101-
tui.LogToCurrentTest(msg.testID, msg.message)
111+
slog.Debug(msg.message, "testID", msg.testID)
102112
}
103113
}
104114

@@ -261,6 +271,15 @@ func TestLog(testID, msg string) {
261271
}
262272
}
263273

274+
// TestDebug logs a debug-level message to a specific test's log panel.
275+
// Only emits when debug logging is enabled (--debug flag).
276+
func TestDebug(testID, msg string) {
277+
if !slog.Default().Enabled(context.Background(), slog.LevelDebug) {
278+
return
279+
}
280+
TestLog(testID, msg)
281+
}
282+
264283
// TestOrServiceLog tries to log to test, falls back to service if testID is empty
265284
func TestOrServiceLog(testID, msg string) {
266285
if testID != "" {

internal/runner/environment.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,19 @@ func (e *Executor) StartEnvironment() error {
2525
if e.GetEffectiveSandboxMode() == SandboxModeAuto && e.lastServiceSandboxed {
2626
log.ServiceLog("⚠️ Service failed to start in sandbox; retrying once without sandbox...")
2727
_ = e.StopService()
28+
29+
// Write separator so the user can see where the retry begins.
30+
// The in-memory buffer survives StopService; the file path persists
31+
// via serviceLogPath and setupServiceLogging will reopen in append mode.
32+
if e.enableServiceLogs && e.serviceLogPath != "" {
33+
if f, err := os.OpenFile(e.serviceLogPath, os.O_APPEND|os.O_WRONLY, 0o600); err == nil { // #nosec G304
34+
_, _ = f.WriteString("⚠️ Retrying without sandbox...\n")
35+
_ = f.Close()
36+
}
37+
} else if e.startupLogBuffer != nil {
38+
_, _ = e.startupLogBuffer.Write([]byte("⚠️ Retrying without sandbox...\n"))
39+
}
40+
2841
e.sandboxBypass = true
2942
e.lastServiceSandboxed = false
3043

@@ -52,6 +65,11 @@ waitForSDK:
5265
log.ServiceLog("✅ SDK acknowledged")
5366

5467
log.Debug("Environment is ready")
68+
69+
// Discard the in-memory startup buffer now that startup succeeded.
70+
// File-based logging (--enable-service-logs) persists for the full run.
71+
e.DiscardStartupBuffer()
72+
5573
return nil
5674
}
5775

0 commit comments

Comments
 (0)