Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions .buildkite/commands/run-e2e-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -174,12 +174,33 @@ else
echo 'Installing Playwright browsers...'
npx playwright install

# TEMP(AINFRA-2588 probe): Windows E2E hangs *after* the Playwright suite finishes
# (~26 min) — the runner process never exits and the job runs to the 180-min timeout.
# Snapshot the surviving process tree while it is hung to find the leaked child/handle.
# Runs in the background so it fires even though `npx playwright test` never returns.
probe_pid=''
if [ "$PLATFORM" = "windows" ]; then
(
snapshot() {
echo "--- :mag: [hang-probe] surviving node/Studio/php processes at ${1}"
powershell -NoProfile -NonInteractive -Command "Get-CimInstance Win32_Process | Where-Object { \$_.Name -match 'node|Studio|php|electron|Squirrel' } | Select-Object ProcessId,ParentProcessId,Name,CommandLine | Format-Table -AutoSize -Wrap | Out-String -Width 1000" || true
}
sleep 2400; snapshot '40m'
sleep 1800; snapshot '70m'
) &
probe_pid=$!
fi

echo 'Running Playwright tests...'
# Capture the exit code so a failure doesn't trip `set -e` before we collect
# the daemon logs (~/.studio/daemon/logs) for artifact upload.
test_exit=0
npx playwright test || test_exit=$?

if [ -n "$probe_pid" ]; then
kill "$probe_pid" 2>/dev/null || true
fi

if [ "$test_exit" -ne 0 ] && [ -d "$HOME/.studio/daemon/logs" ]; then
mkdir -p test-results/daemon-logs
cp -r "$HOME/.studio/daemon/logs/." test-results/daemon-logs/ || true
Expand Down
6 changes: 3 additions & 3 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ e2e_config: &e2e_config
setup: { platform: [], arch: [] }
adjustments:
- with: { platform: mac, arch: arm64 }
# Windows E2E temporarily disabled while AINFRA-2588 investigates Windows E2E hangs in Buildkite.
# See https://linear.app/a8c/issue/AINFRA-2588/investigate-studio-windows-e2e-hangs-in-buildkite
# - with: { platform: windows, arch: x64 }
# TEMP(AINFRA-2588): Windows E2E re-enabled here to verify the closeApp() force-kill teardown
# fix lands the job under the 180-min timeout. Revert to the disabled form before merge.
- with: { platform: windows, arch: x64 }
notify:
- github_commit_status:
context: E2E Tests
Expand Down
32 changes: 31 additions & 1 deletion apps/studio/e2e/e2e-helpers.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { randomUUID } from 'crypto';
import { spawnSync } from 'node:child_process';
import { tmpdir } from 'os';
import path from 'path';
import { findLatestBuild, parseElectronApp } from 'electron-playwright-helpers';
Expand Down Expand Up @@ -80,7 +81,16 @@ export class E2ESession {
await new Promise< void >( ( resolve ) => setTimeout( resolve, 2000 ) );
console.log( 'App closed successfully' );
} catch ( error ) {
console.log( 'Process exit timeout' );
// The graceful quit stalled — a leaky IPC handler or a `site stop --all` child that
// won't exit keeps Electron alive (notably on Windows). Force-kill the process tree so
// the Playwright runner can exit, instead of leaving an orphaned process that blocks the
// run until the CI job timeout.
console.log( 'Process exit timeout; force-killing the app process tree' );
forceKillProcessTree( childProcess );
await Promise.race( [
exitPromise,
new Promise< void >( ( resolve ) => setTimeout( resolve, 5_000 ) ),
] );
} finally {
this.stopCapturingMainProcessLogs();
}
Expand Down Expand Up @@ -223,3 +233,23 @@ export class E2ESession {
return this.mainProcessLogs.join( '' ).trim();
}
}

function forceKillProcessTree( childProcess: ChildProcess ): void {
const { pid } = childProcess;
if ( ! pid ) {
return;
}

// `child.kill()` only signals the Electron main process; on Windows its renderer and php.exe
// descendants orphan and keep DLLs locked, so walk the whole tree with `taskkill /T`. Mirrors
// `killChild` in tools/common/lib/cli-process.ts.
if ( process.platform === 'win32' ) {
spawnSync( 'taskkill', [ '/F', '/T', '/PID', String( pid ) ], {
windowsHide: true,
stdio: 'ignore',
} );
return;
}

childProcess.kill( 'SIGKILL' );
}
59 changes: 59 additions & 0 deletions apps/studio/e2e/global-teardown.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import { execFileSync } from 'node:child_process';

/**
* The E2E suite drives Studio, which spawns a long-lived CLI process-manager daemon
* (`resources/cli/process-manager-daemon.mjs`) to run each site's PHP servers. On Windows
* the daemon is orphaned when the Electron app quits and survives with its
* `php-server-child.mjs` / `php.exe` subtree; the Playwright runner then holds an open
* handle to it and never exits, so the CI job hangs to the 180-min timeout (AINFRA-2588).
* `closeApp()` / `site stop --all` don't reap it. Once the whole suite is done nothing
* needs the daemon, so kill any survivors here to let the runner exit.
*/
export default function globalTeardown() {
if ( process.platform === 'win32' ) {
reapWindowsStudioDaemons();
} else {
reapUnixStudioDaemons();
}
}

function reapWindowsStudioDaemons() {
const query =
'Get-CimInstance Win32_Process | ' +
"Where-Object { $_.CommandLine -match 'process-manager-daemon\\.mjs' } | " +
'Select-Object -ExpandProperty ProcessId';

let pids: string[] = [];
try {
const out = execFileSync(
'powershell',
[ '-NoProfile', '-NonInteractive', '-Command', query ],
{ encoding: 'utf8' }
);
pids = out.split( /\s+/ ).filter( Boolean );
} catch {
// No daemon found, or powershell unavailable — nothing to reap.
return;
}

// `taskkill /T` walks the tree, killing the daemon's php-server-child.mjs and php.exe
// descendants along with it.
for ( const pid of pids ) {
try {
execFileSync( 'taskkill', [ '/F', '/T', '/PID', pid ], { stdio: 'ignore' } );
console.log( `[global-teardown] killed lingering Studio daemon tree (pid ${ pid })` );
} catch {
// Already exited between the query and the kill.
}
}
}

function reapUnixStudioDaemons() {
// macOS/Linux don't exhibit the hang, but reap any lingering daemon by command-line
// match so a leak can't regress silently. `pkill` exits non-zero when nothing matches.
try {
execFileSync( 'pkill', [ '-f', 'process-manager-daemon\\.mjs' ], { stdio: 'ignore' } );
} catch {
// Nothing matched — fine.
}
}
4 changes: 4 additions & 0 deletions playwright.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ export default defineConfig( {
testDir: './apps/studio/e2e',
snapshotPathTemplate: '{testDir}/__screenshots__/{testFilePath}/{arg}{ext}',

// Reap Studio's orphaned CLI process-manager daemon after the suite so the runner can
// exit instead of hanging to the CI timeout on Windows (AINFRA-2588).
globalTeardown: './apps/studio/e2e/global-teardown.ts',

// The app only allows a single instance to be running at a time, so we can
// only run one test at a time.
workers: 1,
Expand Down
Loading