Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 78 additions & 46 deletions apps/cli/commands/pull-reprint.ts
Original file line number Diff line number Diff line change
Expand Up @@ -666,9 +666,10 @@ export async function runCommand(
}

if ( ! hasPullCompletedStage( studioMetadata, 'completed' ) ) {
// Fetch the deferred media/uploads. The selector's media choice is not
// applied yet (see runFullPull) — wired into pull-files in the follow-up.
if ( hasSkippedFiles( studioMetadata.stateDirectory ) ) {
// Fetch the deferred media/uploads unless the user excluded the media
// library. Core, plugins, and themes already came down in the
// essential-files pass; this second pass is just the media library.
if ( ! studioMetadata.skipUploads && hasSkippedFiles( studioMetadata.stateDirectory ) ) {
await downloadSkippedFiles(
getSiteRuntime( site ),
studioMetadata,
Expand Down Expand Up @@ -995,26 +996,27 @@ function readPullMetadata( metadataPath: string ): PullSessionMetadata | null {
}

/**
* Run reprint's composite `pull` command: the whole site-clone
* pipeline (preflight → files-pull → db-pull → db-apply →
* flat-docroot → apply-runtime) in a single child process, with
* reprint owning the stage ordering and, when the prior pull already
* completed, resetting its own sub-command state for a delta re-pull
* via prepare_repull().
* Run the site-clone pipeline as three high-level reprint commands so the
* selective-sync choice maps cleanly onto them:
*
* The SQLite target geometry:
* - If preflight exposed the remote `wp-content` (contentDir set),
* the database lands under `rawDirectory + contentDir`, an
* already-mounted host path that flat-docroot later symlinks into
* the flattened site.
* - Otherwise it falls back to `sitePath/wp-content`.
* 1. `pull-files` — files + flattened layout (`--only` restricts to the
* chosen wp-content folders).
* 2. `pull-db` — database download + import. **Skipped entirely** when the
* user excluded the database.
* 3. `apply-runtime` — server config, run last so it picks up the DB
* credentials `pull-db` wrote to state.
*
* The flattened site (`--flatten-to`) and runtime output
* (`--output-dir`) directories are mounted up front so the single
* fork can write them onto the host filesystem. `ensurePort` must
* run first so `--new-site-url` points at the local server.
* DRAFT — `pull-files`/`pull-db` are high-level commands Reprint is adding to
* hide the low-level orchestration (files-index/files-pull/flat-docroot and
* db-pull/db-apply). Names, arguments, and which command owns flatten/runtime
* are **guesses** (marked `TODO(reprint pull-files/pull-db)`); reconcile once
* the real interface lands. Optimized for simplicity and host-agnostic reuse:
* Studio just picks files vs db and passes `--only`; the commands own the rest.
*
* Advances the pull stage to 'pulled'.
* SQLite target geometry and the `--flatten-to`/`--output-dir` mounts carry
* over from the previous composite-`pull` call. Advances the stage to 'pulled';
* the three commands are individually resumable/idempotent, so a crash between
* them safely re-runs the lot on the next pass.
*/
export async function runFullPull(
runtime: SiteRuntime,
Expand All @@ -1028,41 +1030,67 @@ export async function runFullPull(
? `${ metadata.rawDirectory }${ contentDir }/database/.ht.sqlite`
: `${ metadata.sitePath }/wp-content/database/.ht.sqlite`;
const reprintRuntime = runtime === SITE_RUNTIME_NATIVE_PHP ? 'nginx-fpm' : 'playground-cli';
const onlyArgs = ( metadata.fileOnlyPaths ?? [] ).map( ( onlyPath ) => `--only=${ onlyPath }` );
const mounts = [
{ hostPath: metadata.sitePath, vfsPath: metadata.sitePath },
{ hostPath: metadata.runtimeDirectory, vfsPath: metadata.runtimeDirectory },
];

const runStep = ( progressLabel: string, args: string[] ) =>
runReprintCommandUntilComplete(
metadata.stateDirectory,
metadata.rawDirectory,
args,
( progress ) => logger.reportProgress( progress ),
{ progressLabel, mounts, verboseCommands: verbose, runtime }
);

logger.reportStart( LoggerAction.DOWNLOAD_FILES, __( 'Pulling site…' ) );
await runReprintCommandUntilComplete(
metadata.stateDirectory,
metadata.rawDirectory,
[
'pull',

// 1. Files (+ flattened layout). `--only` restricts to the selected folders.
// TODO(reprint pull-files): confirm command name; whether it owns flat-docroot;
// and how it expresses "defer uploads" (assumed --filter=essential-files +
// the deferred files-sync pass below).
await runStep( __( 'Pulling files' ), [
'pull-files',
apiUrl,
`--secret=${ secret }`,
'--filter=essential-files',
...onlyArgs,
`--flatten-to=${ metadata.sitePath }`,
'--no-adaptive',
`--state-dir=${ metadata.stateDirectory }`,
`--fs-root=${ metadata.rawDirectory }`,
] );

// 2. Database — only when selected. Skipping it leaves the local DB untouched
// on a re-pull (apply-runtime keeps the credentials already in state).
// TODO(reprint pull-db): confirm command name + target/rewrite args.
if ( ! metadata.skipDatabase ) {
await runStep( __( 'Pulling database' ), [
'pull-db',
apiUrl,
`--secret=${ secret }`,
'--filter=essential-files',
'--target-engine=sqlite',
`--target-sqlite-path=${ sqlitePath }`,
`--new-site-url=${ metadata.localUrl! }`,
`--flatten-to=${ metadata.sitePath }`,
`--runtime=${ reprintRuntime }`,
'--start-runtime=none',
`--output-dir=${ metadata.runtimeDirectory }`,
'--no-adaptive',
`--state-dir=${ metadata.stateDirectory }`,
`--fs-root=${ metadata.rawDirectory }`,
// NOTE: the interactive selector / `--only` / `--skip-*` choices are
// captured in metadata but NOT applied yet — this is a full pull. The
// selection is wired into `pull-files`/`pull-db` in the follow-up PR.
],
( progress ) => logger.reportProgress( progress ),
{
progressLabel: __( 'Pulling site' ),
mounts: [
{ hostPath: metadata.sitePath, vfsPath: metadata.sitePath },
{ hostPath: metadata.runtimeDirectory, vfsPath: metadata.runtimeDirectory },
],
verboseCommands: verbose,
runtime,
}
);
] );
}

// 3. Runtime config — last, so it embeds the DB credentials pull-db wrote.
await runStep( __( 'Preparing runtime' ), [
'apply-runtime',
'-',
`--runtime=${ reprintRuntime }`,
`--output-dir=${ metadata.runtimeDirectory }`,
`--flat-document-root=${ metadata.sitePath }`,
'--no-adaptive',
`--state-dir=${ metadata.stateDirectory }`,
] );

logger.reportSuccess( __( 'Site pulled' ) );
recordCompletedStage( metadata, 'pulled' );
}
Expand Down Expand Up @@ -1418,7 +1446,7 @@ export function getReprintApiUrlForSite( siteUrl: string ): string {
}

function buildFilesSyncArgs(
metadata: Pick< PullSessionMetadata, 'stateDirectory' | 'rawDirectory' >,
metadata: Pick< PullSessionMetadata, 'stateDirectory' | 'rawDirectory' | 'fileOnlyPaths' >,
apiUrl: string,
secret: string,
extraArgs: string[] = []
Expand All @@ -1428,6 +1456,10 @@ function buildFilesSyncArgs(
apiUrl,
`--secret=${ secret }`,
...extraArgs,
// Carry the same `--only` set as pull-files so the deferred pass stays in
// scope (files-sync's index is a union keyed by a fingerprint of the
// prefixes and refuses to resume with a different `--only`).
...( metadata.fileOnlyPaths ?? [] ).map( ( onlyPath ) => `--only=${ onlyPath }` ),
// Per-batch ceiling — one sub-process yields after 30 s and the
// client reconnects to continue. Not a total-time budget; a slow
// or high-latency sync just makes more round-trips. Kept well
Expand Down
Loading