From 64af37f786cb44c50a464e921b57b2a8ee17e576 Mon Sep 17 00:00:00 2001 From: Roberto Aranda Date: Fri, 26 Jun 2026 18:26:48 +0200 Subject: [PATCH] Draft: wire selective sync execution to pull-files/pull-db (guessed Reprint interface) --- apps/cli/commands/pull-reprint.ts | 124 +++++++---- apps/cli/commands/tests/pull-reprint.test.ts | 220 ++++++++----------- 2 files changed, 166 insertions(+), 178 deletions(-) diff --git a/apps/cli/commands/pull-reprint.ts b/apps/cli/commands/pull-reprint.ts index 854165f40b..1219098758 100644 --- a/apps/cli/commands/pull-reprint.ts +++ b/apps/cli/commands/pull-reprint.ts @@ -666,9 +666,10 @@ export async function runCommand( } if ( ! hasPullCompletedStage( studioMetadata, 'completed' ) ) { - // Fetch the deferred media/uploads. The selector's media choice is not - // applied yet (see runFullPull) — wired into pull-files in the follow-up. - if ( hasSkippedFiles( studioMetadata.stateDirectory ) ) { + // Fetch the deferred media/uploads unless the user excluded the media + // library. Core, plugins, and themes already came down in the + // essential-files pass; this second pass is just the media library. + if ( ! studioMetadata.skipUploads && hasSkippedFiles( studioMetadata.stateDirectory ) ) { await downloadSkippedFiles( getSiteRuntime( site ), studioMetadata, @@ -995,26 +996,27 @@ function readPullMetadata( metadataPath: string ): PullSessionMetadata | null { } /** - * Run reprint's composite `pull` command: the whole site-clone - * pipeline (preflight → files-pull → db-pull → db-apply → - * flat-docroot → apply-runtime) in a single child process, with - * reprint owning the stage ordering and, when the prior pull already - * completed, resetting its own sub-command state for a delta re-pull - * via prepare_repull(). + * Run the site-clone pipeline as three high-level reprint commands so the + * selective-sync choice maps cleanly onto them: * - * The SQLite target geometry: - * - If preflight exposed the remote `wp-content` (contentDir set), - * the database lands under `rawDirectory + contentDir`, an - * already-mounted host path that flat-docroot later symlinks into - * the flattened site. - * - Otherwise it falls back to `sitePath/wp-content`. + * 1. `pull-files` — files + flattened layout (`--only` restricts to the + * chosen wp-content folders). + * 2. `pull-db` — database download + import. **Skipped entirely** when the + * user excluded the database. + * 3. `apply-runtime` — server config, run last so it picks up the DB + * credentials `pull-db` wrote to state. * - * The flattened site (`--flatten-to`) and runtime output - * (`--output-dir`) directories are mounted up front so the single - * fork can write them onto the host filesystem. `ensurePort` must - * run first so `--new-site-url` points at the local server. + * DRAFT — `pull-files`/`pull-db` are high-level commands Reprint is adding to + * hide the low-level orchestration (files-index/files-pull/flat-docroot and + * db-pull/db-apply). Names, arguments, and which command owns flatten/runtime + * are **guesses** (marked `TODO(reprint pull-files/pull-db)`); reconcile once + * the real interface lands. Optimized for simplicity and host-agnostic reuse: + * Studio just picks files vs db and passes `--only`; the commands own the rest. * - * Advances the pull stage to 'pulled'. + * SQLite target geometry and the `--flatten-to`/`--output-dir` mounts carry + * over from the previous composite-`pull` call. Advances the stage to 'pulled'; + * the three commands are individually resumable/idempotent, so a crash between + * them safely re-runs the lot on the next pass. */ export async function runFullPull( runtime: SiteRuntime, @@ -1028,41 +1030,67 @@ export async function runFullPull( ? `${ metadata.rawDirectory }${ contentDir }/database/.ht.sqlite` : `${ metadata.sitePath }/wp-content/database/.ht.sqlite`; const reprintRuntime = runtime === SITE_RUNTIME_NATIVE_PHP ? 'nginx-fpm' : 'playground-cli'; + const onlyArgs = ( metadata.fileOnlyPaths ?? [] ).map( ( onlyPath ) => `--only=${ onlyPath }` ); + const mounts = [ + { hostPath: metadata.sitePath, vfsPath: metadata.sitePath }, + { hostPath: metadata.runtimeDirectory, vfsPath: metadata.runtimeDirectory }, + ]; + + const runStep = ( progressLabel: string, args: string[] ) => + runReprintCommandUntilComplete( + metadata.stateDirectory, + metadata.rawDirectory, + args, + ( progress ) => logger.reportProgress( progress ), + { progressLabel, mounts, verboseCommands: verbose, runtime } + ); logger.reportStart( LoggerAction.DOWNLOAD_FILES, __( 'Pulling site…' ) ); - await runReprintCommandUntilComplete( - metadata.stateDirectory, - metadata.rawDirectory, - [ - 'pull', + + // 1. Files (+ flattened layout). `--only` restricts to the selected folders. + // TODO(reprint pull-files): confirm command name; whether it owns flat-docroot; + // and how it expresses "defer uploads" (assumed --filter=essential-files + + // the deferred files-sync pass below). + await runStep( __( 'Pulling files' ), [ + 'pull-files', + apiUrl, + `--secret=${ secret }`, + '--filter=essential-files', + ...onlyArgs, + `--flatten-to=${ metadata.sitePath }`, + '--no-adaptive', + `--state-dir=${ metadata.stateDirectory }`, + `--fs-root=${ metadata.rawDirectory }`, + ] ); + + // 2. Database — only when selected. Skipping it leaves the local DB untouched + // on a re-pull (apply-runtime keeps the credentials already in state). + // TODO(reprint pull-db): confirm command name + target/rewrite args. + if ( ! metadata.skipDatabase ) { + await runStep( __( 'Pulling database' ), [ + 'pull-db', apiUrl, `--secret=${ secret }`, - '--filter=essential-files', '--target-engine=sqlite', `--target-sqlite-path=${ sqlitePath }`, `--new-site-url=${ metadata.localUrl! }`, - `--flatten-to=${ metadata.sitePath }`, - `--runtime=${ reprintRuntime }`, - '--start-runtime=none', - `--output-dir=${ metadata.runtimeDirectory }`, '--no-adaptive', `--state-dir=${ metadata.stateDirectory }`, `--fs-root=${ metadata.rawDirectory }`, - // NOTE: the interactive selector / `--only` / `--skip-*` choices are - // captured in metadata but NOT applied yet — this is a full pull. The - // selection is wired into `pull-files`/`pull-db` in the follow-up PR. - ], - ( progress ) => logger.reportProgress( progress ), - { - progressLabel: __( 'Pulling site' ), - mounts: [ - { hostPath: metadata.sitePath, vfsPath: metadata.sitePath }, - { hostPath: metadata.runtimeDirectory, vfsPath: metadata.runtimeDirectory }, - ], - verboseCommands: verbose, - runtime, - } - ); + ] ); + } + + // 3. Runtime config — last, so it embeds the DB credentials pull-db wrote. + await runStep( __( 'Preparing runtime' ), [ + 'apply-runtime', + '-', + `--runtime=${ reprintRuntime }`, + `--output-dir=${ metadata.runtimeDirectory }`, + `--flat-document-root=${ metadata.sitePath }`, + '--no-adaptive', + `--state-dir=${ metadata.stateDirectory }`, + ] ); + logger.reportSuccess( __( 'Site pulled' ) ); recordCompletedStage( metadata, 'pulled' ); } @@ -1418,7 +1446,7 @@ export function getReprintApiUrlForSite( siteUrl: string ): string { } function buildFilesSyncArgs( - metadata: Pick< PullSessionMetadata, 'stateDirectory' | 'rawDirectory' >, + metadata: Pick< PullSessionMetadata, 'stateDirectory' | 'rawDirectory' | 'fileOnlyPaths' >, apiUrl: string, secret: string, extraArgs: string[] = [] @@ -1428,6 +1456,10 @@ function buildFilesSyncArgs( apiUrl, `--secret=${ secret }`, ...extraArgs, + // Carry the same `--only` set as pull-files so the deferred pass stays in + // scope (files-sync's index is a union keyed by a fingerprint of the + // prefixes and refuses to resume with a different `--only`). + ...( metadata.fileOnlyPaths ?? [] ).map( ( onlyPath ) => `--only=${ onlyPath }` ), // Per-batch ceiling — one sub-process yields after 30 s and the // client reconnects to continue. Not a total-time budget; a slow // or high-latency sync just makes more round-trips. Kept well diff --git a/apps/cli/commands/tests/pull-reprint.test.ts b/apps/cli/commands/tests/pull-reprint.test.ts index d08325c144..8bd4c17fd2 100644 --- a/apps/cli/commands/tests/pull-reprint.test.ts +++ b/apps/cli/commands/tests/pull-reprint.test.ts @@ -185,55 +185,8 @@ describe( 'CLI: studio pull-reprint single pull phase', () => { vi.restoreAllMocks(); } ); - it( 'does not apply the selection yet — no --no-db/--only even when set in metadata (inert menu)', async () => { - const technicalSiteDirectory = fs.mkdtempSync( - path.join( os.tmpdir(), 'studio-import-pull-inert-' ) - ); - const stateDirectory = path.join( technicalSiteDirectory, 'state' ); - const rawDirectory = path.join( technicalSiteDirectory, 'raw' ); - fs.mkdirSync( stateDirectory, { recursive: true } ); - fs.mkdirSync( rawDirectory, { recursive: true } ); - fs.writeFileSync( - path.join( stateDirectory, '.import-state.json' ), - JSON.stringify( { preflight: { data: {} } } ) - ); - - const reprint = vi - .spyOn( migrationClient, 'runReprintCommandUntilComplete' ) - .mockResolvedValue( { stdout: '{"ok":true}', stderr: '', exitCode: 0 } ); - - await runFullPull( - SITE_RUNTIME_PLAYGROUND, - { - version: 1, - normalizedUrl: 'https://example.com/', - siteName: 'example', - sitePath: path.join( technicalSiteDirectory, 'site' ), - technicalSiteDirectory, - rawDirectory, - stateDirectory, - runtimeDirectory: path.join( technicalSiteDirectory, 'runtime' ), - runtimeBlueprintPath: path.join( technicalSiteDirectory, 'runtime', 'blueprint.json' ), - stage: 'initialized', - localUrl: 'http://localhost:8881', - // Selection captured in metadata, but the pull must ignore it for now. - skipDatabase: true, - skipUploads: true, - fileOnlyPaths: [ ':wp-plugins:', '/srv/htdocs/wp-content/plugins/akismet' ], - } as never, - 'https://example.com/?reprint-api', - 'hmac-secret', - false - ); - - const passedArgs = reprint.mock.calls[ 0 ][ 2 ] as string[]; - expect( passedArgs ).not.toContain( '--no-db' ); - expect( passedArgs.some( ( a ) => a.startsWith( '--only' ) ) ).toBe( false ); - - fs.rmSync( technicalSiteDirectory, { recursive: true, force: true } ); - } ); - - it( 'runs one reprint pull with sqlite under the content dir, mounts the site + runtime, and advances the stage', async () => { + // Shared fixture for the runFullPull pipeline tests. + function setUpPull( extraMetadata: Record< string, unknown > = {}, contentDir?: string ) { const technicalSiteDirectory = fs.mkdtempSync( path.join( os.tmpdir(), 'studio-import-pull-' ) ); @@ -243,35 +196,19 @@ describe( 'CLI: studio pull-reprint single pull phase', () => { const runtimeDirectory = path.join( technicalSiteDirectory, 'runtime' ); fs.mkdirSync( stateDirectory, { recursive: true } ); fs.mkdirSync( rawDirectory, { recursive: true } ); - - // Preflight reported the remote site's wp-content path at - // database.wp.paths_urls.content_dir; the pull's db-apply stage targets - // an sqlite file under rawDirectory + that path so flat-docroot can - // symlink it into the flattened site. fs.writeFileSync( path.join( stateDirectory, '.import-state.json' ), JSON.stringify( { preflight: { - data: { - database: { - wp: { - paths_urls: { - content_dir: '/srv/htdocs/wp-content', - }, - }, - }, - }, + data: contentDir ? { database: { wp: { paths_urls: { content_dir: contentDir } } } } : {}, }, } ) ); - const reprint = vi .spyOn( migrationClient, 'runReprintCommandUntilComplete' ) .mockResolvedValue( { stdout: '{"ok":true}', stderr: '', exitCode: 0 } ); - const metadata = { version: 1, - importKey: 'abc', normalizedUrl: 'https://example.com/', siteName: 'example', sitePath, @@ -283,7 +220,25 @@ describe( 'CLI: studio pull-reprint single pull phase', () => { stage: 'initialized', localUrl: 'http://localhost:8881', remoteSiteUrl: 'https://example.com', + ...extraMetadata, } as never; + return { + technicalSiteDirectory, + stateDirectory, + rawDirectory, + sitePath, + runtimeDirectory, + reprint, + metadata, + }; + } + + const commandsOf = ( reprint: { mock: { calls: unknown[][] } } ) => + reprint.mock.calls.map( ( call ) => ( call[ 2 ] as string[] )[ 0 ] ); + + it( 'runs pull-files → pull-db → apply-runtime with the sqlite target, mounts, and advances the stage', async () => { + const { technicalSiteDirectory, rawDirectory, sitePath, runtimeDirectory, reprint, metadata } = + setUpPull( {}, '/srv/htdocs/wp-content' ); await runFullPull( SITE_RUNTIME_PLAYGROUND, @@ -293,34 +248,31 @@ describe( 'CLI: studio pull-reprint single pull phase', () => { false ); - expect( reprint ).toHaveBeenCalledTimes( 1 ); - const [ passedState, passedRaw, passedArgs, , passedOptions ] = reprint.mock.calls[ 0 ]; - expect( passedState ).toBe( stateDirectory ); - expect( passedRaw ).toBe( rawDirectory ); - expect( passedArgs ).toEqual( [ - 'pull', - 'https://example.com/?reprint-api', - '--secret=hmac-secret', - '--filter=essential-files', - '--target-engine=sqlite', - `--target-sqlite-path=${ rawDirectory }/srv/htdocs/wp-content/database/.ht.sqlite`, - '--new-site-url=http://localhost:8881', - `--flatten-to=${ sitePath }`, - '--runtime=playground-cli', - '--start-runtime=none', - `--output-dir=${ runtimeDirectory }`, - '--no-adaptive', - `--state-dir=${ stateDirectory }`, - `--fs-root=${ rawDirectory }`, - ] ); - // The flattened site and runtime output dirs are mounted up front so - // the single fork can write them to the host filesystem. - expect( passedOptions?.mounts ).toEqual( [ - { hostPath: sitePath, vfsPath: sitePath }, - { hostPath: runtimeDirectory, vfsPath: runtimeDirectory }, - ] ); + expect( reprint ).toHaveBeenCalledTimes( 3 ); + expect( commandsOf( reprint ) ).toEqual( [ 'pull-files', 'pull-db', 'apply-runtime' ] ); + + const [ filesArgs, dbArgs, runtimeArgs ] = reprint.mock.calls.map( + ( c ) => c[ 2 ] as string[] + ); + expect( filesArgs ).toContain( '--filter=essential-files' ); + expect( filesArgs ).toContain( `--flatten-to=${ sitePath }` ); + expect( filesArgs.some( ( a ) => a.startsWith( '--only' ) ) ).toBe( false ); + expect( dbArgs ).toContain( '--target-engine=sqlite' ); + expect( dbArgs ).toContain( + `--target-sqlite-path=${ rawDirectory }/srv/htdocs/wp-content/database/.ht.sqlite` + ); + expect( dbArgs ).toContain( '--new-site-url=http://localhost:8881' ); + expect( runtimeArgs ).toContain( '--runtime=playground-cli' ); + expect( runtimeArgs ).toContain( `--flat-document-root=${ sitePath }` ); + + // Every command mounts the flattened site + runtime output dirs. + for ( const call of reprint.mock.calls ) { + expect( ( call[ 4 ] as { mounts?: unknown } )?.mounts ).toEqual( [ + { hostPath: sitePath, vfsPath: sitePath }, + { hostPath: runtimeDirectory, vfsPath: runtimeDirectory }, + ] ); + } - // Stage is bumped + persisted so a resumed run skips the pull. const persisted = JSON.parse( fs.readFileSync( path.join( technicalSiteDirectory, 'pull.json' ), 'utf-8' ) ); @@ -329,41 +281,30 @@ describe( 'CLI: studio pull-reprint single pull phase', () => { fs.rmSync( technicalSiteDirectory, { recursive: true, force: true } ); } ); - it( 'falls back to the flattened wp-content sqlite path when preflight exposes no content dir', async () => { - const technicalSiteDirectory = fs.mkdtempSync( - path.join( os.tmpdir(), 'studio-import-pull-fallback-' ) + it( 'skips pull-db entirely when the database is excluded', async () => { + const { technicalSiteDirectory, reprint, metadata } = setUpPull( + { skipDatabase: true }, + '/srv/htdocs/wp-content' ); - const stateDirectory = path.join( technicalSiteDirectory, 'state' ); - const rawDirectory = path.join( technicalSiteDirectory, 'raw' ); - const sitePath = path.join( technicalSiteDirectory, 'site' ); - const runtimeDirectory = path.join( technicalSiteDirectory, 'runtime' ); - fs.mkdirSync( stateDirectory, { recursive: true } ); - fs.mkdirSync( rawDirectory, { recursive: true } ); - fs.writeFileSync( - path.join( stateDirectory, '.import-state.json' ), - JSON.stringify( { preflight: { data: {} } } ) + await runFullPull( + SITE_RUNTIME_PLAYGROUND, + metadata, + 'https://example.com/?reprint-api', + 'hmac-secret', + false ); - const reprint = vi - .spyOn( migrationClient, 'runReprintCommandUntilComplete' ) - .mockResolvedValue( { stdout: '{"ok":true}', stderr: '', exitCode: 0 } ); + expect( commandsOf( reprint ) ).toEqual( [ 'pull-files', 'apply-runtime' ] ); - const metadata = { - version: 1, - importKey: 'def', - normalizedUrl: 'https://example.com/', - siteName: 'example', - sitePath, - technicalSiteDirectory, - rawDirectory, - stateDirectory, - runtimeDirectory, - runtimeBlueprintPath: path.join( runtimeDirectory, 'blueprint.json' ), - stage: 'initialized', - localUrl: 'http://localhost:8881', - remoteSiteUrl: 'https://example.com', - } as never; + fs.rmSync( technicalSiteDirectory, { recursive: true, force: true } ); + } ); + + it( 'threads --only into pull-files for a folder-restricted pull', async () => { + const { technicalSiteDirectory, reprint, metadata } = setUpPull( + { fileOnlyPaths: [ ':wp-plugins:', '/srv/htdocs/wp-content/plugins/akismet' ] }, + '/srv/htdocs/wp-content' + ); await runFullPull( SITE_RUNTIME_PLAYGROUND, @@ -373,17 +314,32 @@ describe( 'CLI: studio pull-reprint single pull phase', () => { false ); - const [ , , passedArgs, , passedOptions ] = reprint.mock.calls[ 0 ]; - // With no content dir from preflight, the sqlite target falls back to - // the flattened site's wp-content. - expect( passedArgs ).toContain( + const filesArgs = reprint.mock.calls[ 0 ][ 2 ] as string[]; + expect( filesArgs[ 0 ] ).toBe( 'pull-files' ); + expect( filesArgs ).toContain( '--only=:wp-plugins:' ); + expect( filesArgs ).toContain( '--only=/srv/htdocs/wp-content/plugins/akismet' ); + + fs.rmSync( technicalSiteDirectory, { recursive: true, force: true } ); + } ); + + it( 'falls back to the flattened wp-content sqlite path when preflight exposes no content dir', async () => { + const { technicalSiteDirectory, sitePath, reprint, metadata } = setUpPull(); + + await runFullPull( + SITE_RUNTIME_PLAYGROUND, + metadata, + 'https://example.com/?reprint-api', + 'hmac-secret', + false + ); + + // With no content dir from preflight, the sqlite target (on pull-db) falls + // back to the flattened site's wp-content. + const dbArgs = reprint.mock.calls[ 1 ][ 2 ] as string[]; + expect( dbArgs[ 0 ] ).toBe( 'pull-db' ); + expect( dbArgs ).toContain( `--target-sqlite-path=${ sitePath }/wp-content/database/.ht.sqlite` ); - // The site + runtime dirs are always mounted for the single fork. - expect( passedOptions?.mounts ).toEqual( [ - { hostPath: sitePath, vfsPath: sitePath }, - { hostPath: runtimeDirectory, vfsPath: runtimeDirectory }, - ] ); fs.rmSync( technicalSiteDirectory, { recursive: true, force: true } ); } );