diff --git a/apps/cli/commands/pull-reprint.ts b/apps/cli/commands/pull-reprint.ts index 22cdf908eb..f80815e79a 100644 --- a/apps/cli/commands/pull-reprint.ts +++ b/apps/cli/commands/pull-reprint.ts @@ -39,8 +39,15 @@ import { type ReprintProcessResult, runReprintCommandUntilComplete, } from 'cli/lib/pull/migration-client'; +import { + fetchReprintPullTree, + mapCliOnlyToReprint, + selectFreshPullOptions, + selectPullItems, +} from 'cli/lib/pull/reprint-selector'; import { getContentDirFromState, + hasLocalFilesIndex, hasSkippedFiles, readReprintState, writeReprintState, @@ -81,6 +88,23 @@ export const registerCommand = ( yargs: StudioArgv ) => { 'Shared HMAC secret configured in the migration plugin on the remote source' ), } ) + .option( 'only', { + type: 'string', + array: true, + describe: __( + 'Restrict the pull to specific wp-content folders (e.g. plugins/akismet, themes, uploads); repeatable.' + ), + } ) + .option( 'skip-database', { + type: 'boolean', + describe: __( 'Do not pull the database (keeps the local one)' ), + default: false, + } ) + .option( 'skip-uploads', { + type: 'boolean', + describe: __( 'Do not pull the media library (uploads)' ), + default: false, + } ) .option( 'verbose', { type: 'boolean', describe: __( 'Show detailed error information and executed commands' ), @@ -95,7 +119,12 @@ export const registerCommand = ( yargs: StudioArgv ) => { argv.path as string, argv.url as string | undefined, argv.secret as string | undefined, - verbose + verbose, + { + only: argv.only as string[] | undefined, + skipDatabase: argv[ 'skip-database' ] as boolean, + skipUploads: argv[ 'skip-uploads' ] as boolean, + } ); } catch ( error ) { if ( error instanceof PullError ) { @@ -144,12 +173,27 @@ type PullStage = ( typeof pullStageOrder )[ number ]; */ const PULL_METADATA_VERSION = 1; -interface PullProgress { +/** + * Selective-sync choice for one pull attempt (interactive selector or + * `--only`/`--skip-*` flags). Lives in the transient `pull.json`: a resume + * reuses the choice without re-prompting, while a delta re-pull resets it + * and asks again. + */ +interface PullSelectionState { + /** True once the selection step has run, even if nothing was excluded. */ + selectionMade?: boolean; + skipDatabase?: boolean; + skipUploads?: boolean; + /** reprint `--only` source values restricting the file pull. */ + fileOnlyPaths?: string[]; +} + +interface PullProgress extends PullSelectionState { version: number; stage: PullStage; } -interface PullSessionMetadata { +interface PullSessionMetadata extends PullSelectionState { version: number; stage: PullStage; sitePath: string; @@ -161,6 +205,13 @@ interface PullSessionMetadata { runtimeBlueprintPath: string; } +/** Raw selective-sync CLI flags (`--only`, `--skip-database`, `--skip-uploads`). */ +interface CliSelectionOptions { + only?: string[]; + skipDatabase?: boolean; + skipUploads?: boolean; +} + /** * Normalized result of turning CLI arguments into something the pull * pipeline can act on: a remote URL to fetch from and the HMAC secret @@ -219,7 +270,8 @@ export async function runCommand( localPath: string, remoteUrl?: string, remoteSecret?: string, - verbose = false + verbose = false, + cliSelection: CliSelectionOptions = {} ): Promise< void > { // The local site must already exist (created via `studio create`). // pull-reprint refreshes an existing site from a remote source; it @@ -255,6 +307,8 @@ export async function runCommand( const isRepull = studioMetadata.stage === 'completed'; if ( isRepull ) { studioMetadata.stage = 'initialized'; + // A delta re-pull must prompt for a fresh selective-sync choice. + clearSelection( studioMetadata ); savePullProgress( studioMetadata ); // A completed pull.json implies a full pull happened; backfill the // durable marker in case it predates the importComplete flag. @@ -387,6 +441,25 @@ export async function runCommand( // `studioMetadata.localUrl` comes from the existing site's port, so // no port allocation is needed here. + // Resolve what to sync (flags or interactive prompt); resumes reuse + // the choice persisted in pull.json. NOTE: the selection is captured + // but not applied yet — the pull below still fetches everything; + // execution lands in the follow-up PR. + const proceed = await applySelection( { + metadata: studioMetadata, + // Without the local index (cleared/damaged scratch) a + // folder-restricted pull could not include core, so treat it as a + // first pull even when importComplete is set. + isFirstPull: ! site.importComplete || ! hasLocalFilesIndex( studioMetadata.stateDirectory ), + cli: cliSelection, + apiUrl, + secret, + verbose, + } ); + if ( ! proceed ) { + return; + } + // A single `reprint pull` runs the whole pipeline in one PHP-WASM // fork: files-pull → db-pull → db-apply → flat-docroot → // apply-runtime. reprint owns the stage ordering internally and, on @@ -542,6 +615,106 @@ export async function runCommand( } } +function clearSelection( metadata: PullSessionMetadata ): void { + metadata.selectionMade = undefined; + metadata.skipDatabase = undefined; + metadata.skipUploads = undefined; + metadata.fileOnlyPaths = undefined; +} + +/** + * Resolve the selective-sync choice (persisted choice, CLI flags, or + * interactive prompt, in that order) and record it on the metadata + + * `pull.json`. Returns `false` when the user cancels the prompt. + * + * On a first pull only the media library is optional — see the module + * comment in `reprint-selector.ts` for why `--only`/`--skip-database` + * cannot work before core has been pulled. + */ +async function applySelection( params: { + metadata: PullSessionMetadata; + isFirstPull: boolean; + cli: CliSelectionOptions; + apiUrl: string; + secret: string; + verbose: boolean; +} ): Promise< boolean > { + const { metadata, isFirstPull, cli, apiUrl, secret, verbose } = params; + const commitSelection = () => { + metadata.selectionMade = true; + savePullProgress( metadata ); + }; + + // A folder selection persisted for what is now a first pull (the scratch + // was cleared since it was captured) cannot produce a working site; drop + // it and choose again. + if ( isFirstPull && ( metadata.fileOnlyPaths !== undefined || metadata.skipDatabase ) ) { + clearSelection( metadata ); + savePullProgress( metadata ); + } + + if ( hasPullCompletedStage( metadata, 'pulled' ) || metadata.selectionMade ) { + return true; + } + + const cliOnly = cli.only?.filter( ( value ) => value.trim().length > 0 ) ?? []; + const cliDriven = cliOnly.length > 0 || cli.skipDatabase || cli.skipUploads; + + if ( cliDriven ) { + if ( isFirstPull && ( cliOnly.length > 0 || cli.skipDatabase ) ) { + throw new LoggerError( + __( + "The first pull of a site must download WordPress core and the database, so `--only` and `--skip-database` aren't available yet (`--skip-uploads` is). Run a full pull first; folder-level selection works on subsequent pulls." + ) + ); + } + if ( cliOnly.length > 0 ) { + const contentDir = getContentDirFromState( metadata.stateDirectory ) ?? ''; + metadata.fileOnlyPaths = mapCliOnlyToReprint( cliOnly, contentDir ); + } + metadata.skipDatabase = !! cli.skipDatabase; + metadata.skipUploads = !! cli.skipUploads; + commitSelection(); + return true; + } + + if ( ! process.stdin.isTTY ) { + return true; // non-interactive, no flags → full pull + } + + if ( isFirstPull ) { + const fresh = await selectFreshPullOptions(); + metadata.skipUploads = fresh.skipUploads; + commitSelection(); + return true; + } + + const { tree, contentDir } = await fetchReprintPullTree( { + stateDirectory: metadata.stateDirectory, + rawDirectory: metadata.rawDirectory, + apiUrl, + secret, + runtime: SITE_RUNTIME_NATIVE_PHP, + verbose, + } ); + if ( tree.length === 0 || ! contentDir ) { + commitSelection(); + return true; + } + const selection = await selectPullItems( tree, contentDir ); + if ( ! selection ) { + console.log( __( 'Cancelled.' ) ); + return false; + } + metadata.fileOnlyPaths = selection.fileOnlyPaths; + metadata.skipDatabase = selection.skipDatabase; + commitSelection(); + // Keep reprint's state intact here: the follow-up file pull needs the + // local index (`.import-index.jsonl`) to run as a delta into the + // non-empty raw fs-root. + return true; +} + /** * Runs `reprint preflight` against the remote site and caches the * response envelope at `stateDirectory/preflight.json`. @@ -705,7 +878,14 @@ function getMetadataPath( technicalSiteDirectory: string ): string { function savePullProgress( metadata: PullSessionMetadata ): void { fs.mkdirSync( metadata.technicalSiteDirectory, { recursive: true } ); const metadataPath = getMetadataPath( metadata.technicalSiteDirectory ); - const progress: PullProgress = { version: metadata.version, stage: metadata.stage }; + const progress: PullProgress = { + version: metadata.version, + stage: metadata.stage, + selectionMade: metadata.selectionMade, + skipDatabase: metadata.skipDatabase, + skipUploads: metadata.skipUploads, + fileOnlyPaths: metadata.fileOnlyPaths, + }; const tempPath = `${ metadataPath }.tmp`; fs.writeFileSync( tempPath, JSON.stringify( progress, null, 2 ) + '\n' ); fs.renameSync( tempPath, metadataPath ); @@ -1060,6 +1240,10 @@ export async function getPullSessionMetadata( site: SiteData ) { const metadata = { version: progress.version, stage: progress.stage, + selectionMade: progress.selectionMade, + skipDatabase: progress.skipDatabase, + skipUploads: progress.skipUploads, + fileOnlyPaths: progress.fileOnlyPaths, sitePath: site.path, localUrl: getSiteUrl( site ), technicalSiteDirectory, diff --git a/apps/cli/commands/tests/pull-reprint.test.ts b/apps/cli/commands/tests/pull-reprint.test.ts index d98695a8fe..3064914881 100644 --- a/apps/cli/commands/tests/pull-reprint.test.ts +++ b/apps/cli/commands/tests/pull-reprint.test.ts @@ -299,6 +299,53 @@ describe( 'CLI: studio pull-reprint single pull phase', () => { fs.rmSync( technicalSiteDirectory, { recursive: true, force: true } ); } ); + it( 'does not apply the selective-sync choice yet — no --no-db/--only even when set (inert menu)', async () => { + const technicalSiteDirectory = fs.mkdtempSync( + path.join( os.tmpdir(), 'studio-import-pull-inert-' ) + ); + const stateDirectory = path.join( technicalSiteDirectory, 'state' ); + const rawDirectory = path.join( technicalSiteDirectory, 'raw' ); + fs.mkdirSync( stateDirectory, { recursive: true } ); + fs.mkdirSync( rawDirectory, { recursive: true } ); + fs.writeFileSync( + path.join( stateDirectory, '.import-state.json' ), + JSON.stringify( { preflight: { data: {} } } ) + ); + + const reprint = vi + .spyOn( migrationClient, 'runReprintCommandUntilComplete' ) + .mockResolvedValue( { stdout: '{"ok":true}', stderr: '', exitCode: 0 } ); + + await runFullPull( + SITE_RUNTIME_PLAYGROUND, + { + version: 1, + sitePath: path.join( technicalSiteDirectory, 'site' ), + technicalSiteDirectory, + rawDirectory, + stateDirectory, + runtimeDirectory: path.join( technicalSiteDirectory, 'runtime' ), + runtimeBlueprintPath: path.join( technicalSiteDirectory, 'runtime', 'blueprint.json' ), + stage: 'initialized', + localUrl: 'http://localhost:8881', + // Selection captured in pull.json, but the pull must ignore it for now. + selectionMade: true, + skipDatabase: true, + skipUploads: true, + fileOnlyPaths: [ ':wp-plugins:', '/srv/htdocs/wp-content/plugins/akismet' ], + } as never, + 'https://example.com/?reprint-api', + 'hmac-secret', + false + ); + + const passedArgs = reprint.mock.calls[ 0 ][ 2 ] as string[]; + expect( passedArgs ).not.toContain( '--no-db' ); + expect( passedArgs.some( ( a ) => a.startsWith( '--only' ) ) ).toBe( false ); + + fs.rmSync( technicalSiteDirectory, { recursive: true, force: true } ); + } ); + it( 'falls back to the flattened wp-content sqlite path when preflight exposes no content dir', async () => { const technicalSiteDirectory = fs.mkdtempSync( path.join( os.tmpdir(), 'studio-import-pull-fallback-' ) diff --git a/apps/cli/lib/pull/reprint-selector.test.ts b/apps/cli/lib/pull/reprint-selector.test.ts new file mode 100644 index 0000000000..bb6b430d92 --- /dev/null +++ b/apps/cli/lib/pull/reprint-selector.test.ts @@ -0,0 +1,179 @@ +import fs from 'fs'; +import os from 'os'; +import path from 'path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { + buildReprintTreeFromIndex, + freshSelectionFromValues, + mapCheckedNodesToSelection, + mapCliOnlyToReprint, +} from './reprint-selector'; +import type { TreeNode } from 'cli/lib/tree-checkbox'; + +const CONTENT_DIR = '/srv/htdocs/wp-content'; + +describe( 'freshSelectionFromValues', () => { + it( 'keeps the media library when checked', () => { + expect( freshSelectionFromValues( [ 'uploads' ] ) ).toEqual( { skipUploads: false } ); + } ); + + it( 'skips the media library when unchecked', () => { + expect( freshSelectionFromValues( [] ) ).toEqual( { skipUploads: true } ); + } ); +} ); + +function encodeEntry( + absolutePath: string, + type: 'file' | 'dir' | 'link' = 'file', + target?: string +): string { + return JSON.stringify( { + path: Buffer.from( absolutePath, 'utf-8' ).toString( 'base64' ), + size: 0, + ctime: 0, + type, + ...( target ? { target: Buffer.from( target, 'utf-8' ).toString( 'base64' ) } : {} ), + } ); +} + +/** Minimal checked node — mapCheckedNodesToSelection only reads `value`. */ +function checked( value: string, depth = 1 ): TreeNode { + return { name: value, value, isDirectory: false, checked: true, expanded: false, depth }; +} + +describe( 'buildReprintTreeFromIndex', () => { + let dir: string; + let indexPath: string; + + beforeEach( () => { + dir = fs.mkdtempSync( path.join( os.tmpdir(), 'reprint-selector-' ) ); + indexPath = path.join( dir, '.import-remote-index.jsonl' ); + } ); + + afterEach( () => { + fs.rmSync( dir, { recursive: true, force: true } ); + } ); + + it( 'builds a directories-only Database + wp-content tree, pruning files', () => { + fs.writeFileSync( + indexPath, + [ + encodeEntry( `${ CONTENT_DIR }/plugins/akismet/akismet.php` ), + encodeEntry( `${ CONTENT_DIR }/plugins/hello.php` ), // single-file plugin → pruned + encodeEntry( `${ CONTENT_DIR }/themes/twentytwentyfour`, 'dir' ), + encodeEntry( `${ CONTENT_DIR }/index.php` ), // drop-in file → pruned + encodeEntry( '/wordpress/core/wp-load.php' ), // outside wp-content → ignored + ].join( '\n' ) + ); + + const tree = buildReprintTreeFromIndex( indexPath, CONTENT_DIR ); + + expect( tree ).toHaveLength( 2 ); + expect( tree[ 0 ] ).toMatchObject( { value: 'database', depth: 0 } ); + expect( tree[ 1 ] ).toMatchObject( { value: 'wp-content', isDirectory: true, depth: 0 } ); + + const topLevel = tree[ 1 ].children ?? []; + expect( topLevel.map( ( n ) => n.value ) ).toEqual( [ 'plugins', 'themes' ] ); + + const plugins = topLevel.find( ( n ) => n.value === 'plugins' )!; + // akismet (real dir) kept; hello.php (file) pruned. + expect( ( plugins.children ?? [] ).map( ( n ) => n.value ) ).toEqual( [ 'plugins/akismet' ] ); + expect( plugins.children![ 0 ].children ?? [] ).toEqual( [] ); // akismet.php pruned + } ); + + it( 'keeps symlinks that point at directories and prunes symlinks that point at files', () => { + fs.writeFileSync( + indexPath, + [ + encodeEntry( + `${ CONTENT_DIR }/plugins/jetpack`, + 'link', + '/wordpress/plugins/jetpack/16.0' + ), + encodeEntry( '/wordpress/plugins/jetpack/16.0/jetpack.php' ), + encodeEntry( `${ CONTENT_DIR }/advanced-cache.php`, 'link' ), // drop-in → pruned + ].join( '\n' ) + ); + + const topLevel = buildReprintTreeFromIndex( indexPath, CONTENT_DIR )[ 1 ].children ?? []; + expect( topLevel.map( ( n ) => n.value ) ).toEqual( [ 'plugins' ] ); + expect( ( topLevel[ 0 ].children ?? [] ).map( ( n ) => n.value ) ).toEqual( [ + 'plugins/jetpack', + ] ); + expect( topLevel[ 0 ].children![ 0 ].isDirectory ).toBe( true ); + } ); + + it( 'returns an empty tree when the content dir is unknown or nothing is under it', () => { + fs.writeFileSync( indexPath, encodeEntry( '/srv/htdocs/index.php' ) ); + expect( buildReprintTreeFromIndex( indexPath, null ) ).toEqual( [] ); + expect( buildReprintTreeFromIndex( indexPath, CONTENT_DIR ) ).toEqual( [] ); + } ); +} ); + +describe( 'mapCheckedNodesToSelection', () => { + it( 'maps a full selection to no --only and keeps the database', () => { + const selected = [ checked( 'database', 0 ), checked( 'wp-content', 0 ), checked( 'plugins' ) ]; + expect( mapCheckedNodesToSelection( selected, CONTENT_DIR ) ).toEqual( { + fileOnlyPaths: [], + skipDatabase: false, + hasAnyFile: true, + } ); + } ); + + it( 'flags --no-db when the database is unchecked', () => { + const selected = [ checked( 'wp-content', 0 ), checked( 'plugins' ) ]; + expect( mapCheckedNodesToSelection( selected, CONTENT_DIR ).skipDatabase ).toBe( true ); + } ); + + it( 'maps top-level areas to reprint tokens or absolute paths', () => { + const selected = [ checked( 'database', 0 ), checked( 'plugins' ), checked( 'themes' ) ]; + expect( mapCheckedNodesToSelection( selected, CONTENT_DIR ).fileOnlyPaths ).toEqual( [ + ':wp-plugins:', + `${ CONTENT_DIR }/themes`, + ] ); + } ); + + it( 'collapses a fully-checked directory and keeps a deep partial selection as a path', () => { + expect( + mapCheckedNodesToSelection( + [ checked( 'plugins' ), checked( 'plugins/akismet', 2 ) ], + CONTENT_DIR + ).fileOnlyPaths + ).toEqual( [ ':wp-plugins:' ] ); + + expect( + mapCheckedNodesToSelection( [ checked( 'plugins/akismet', 2 ) ], CONTENT_DIR ).fileOnlyPaths + ).toEqual( [ `${ CONTENT_DIR }/plugins/akismet` ] ); + } ); + + it( 'reports no files selected when only the database is checked', () => { + expect( + mapCheckedNodesToSelection( [ checked( 'database', 0 ) ], CONTENT_DIR ).hasAnyFile + ).toBe( false ); + } ); +} ); + +describe( 'mapCliOnlyToReprint', () => { + it( 'maps wp-content-relative paths to tokens or absolute paths', () => { + expect( + mapCliOnlyToReprint( [ 'plugins', 'plugins/akismet', 'themes', 'uploads' ], CONTENT_DIR ) + ).toEqual( [ + ':wp-plugins:', + `${ CONTENT_DIR }/plugins/akismet`, + `${ CONTENT_DIR }/themes`, + ':wp-uploads:', + ] ); + } ); + + it( 'strips a leading wp-content/ and trailing slashes', () => { + expect( mapCliOnlyToReprint( [ 'wp-content/plugins/akismet/' ], CONTENT_DIR ) ).toEqual( [ + `${ CONTENT_DIR }/plugins/akismet`, + ] ); + } ); + + it( 'passes through reprint tokens and absolute paths unchanged', () => { + expect( + mapCliOnlyToReprint( [ ':wp-uploads:', '/wordpress/plugins/akismet' ], CONTENT_DIR ) + ).toEqual( [ ':wp-uploads:', '/wordpress/plugins/akismet' ] ); + } ); +} ); diff --git a/apps/cli/lib/pull/reprint-selector.ts b/apps/cli/lib/pull/reprint-selector.ts new file mode 100644 index 0000000000..68b2b93090 --- /dev/null +++ b/apps/cli/lib/pull/reprint-selector.ts @@ -0,0 +1,327 @@ +/** + * Selective-sync prompts for `pull-reprint`. + * + * A first pull (keyed on `site.importComplete`) only offers a media-library + * toggle: reprint's `--only` is an *include* list that replaces the default + * export roots, so a partial folder selection would drop WordPress core, + * while excluding uploads rides on `--filter=essential-files` and is safe + * anytime. Subsequent pulls — core already in the raw fs-root — offer the + * full wp-content folder tree plus a database toggle. + * + * `--only` accepts directories only (a file or symlink-to-file crashes the + * remote exporter), so the tree is built from directories and symlinks that + * resolve to directories, e.g. wp.com's per-plugin symlinks. + */ +import fs from 'fs'; +import { checkbox } from '@inquirer/prompts'; +import { SiteRuntime } from '@studio/common/lib/site-runtime'; +import { __ } from '@wordpress/i18n'; +import { runReprintCommandUntilComplete } from 'cli/lib/pull/migration-client'; +import { getContentDirFromState, getRemoteIndexPath } from 'cli/lib/pull/reprint-state'; +import { buildRootTree } from 'cli/lib/sync-selector'; +import treeCheckbox from 'cli/lib/tree-checkbox'; +import type { TreeNode } from 'cli/lib/tree-checkbox'; + +/** The well-known wp-content children that resolve to reprint path tokens. */ +const CONTENT_DIR_TOKENS: Record< string, string > = { + plugins: ':wp-plugins:', + 'mu-plugins': ':wp-mu-plugins:', + uploads: ':wp-uploads:', +}; + +export interface FreshPullSelection { + skipUploads: boolean; +} + +export function freshSelectionFromValues( values: string[] ): FreshPullSelection { + return { skipUploads: ! values.includes( 'uploads' ) }; +} + +export async function selectFreshPullOptions(): Promise< FreshPullSelection > { + const selected = await checkbox( { + message: __( + 'Select what to pull. WordPress core, plugins, themes, and the database are always included on a first pull.' + ), + choices: [ { name: __( 'Media library (uploads)' ), value: 'uploads', checked: true } ], + } ); + return freshSelectionFromValues( selected ); +} + +export interface PullSelection { + /** reprint `--only` source values; empty means "everything" (no `--only`). */ + fileOnlyPaths: string[]; + /** True when "Database" was unchecked. */ + skipDatabase: boolean; + /** False when only the database was selected. */ + hasAnyFile: boolean; +} + +/** Append the directory-marker slash and sort each level alphabetically. */ +function finalizeNodes( nodes: TreeNode[] ): TreeNode[] { + for ( const node of nodes ) { + if ( ! node.name.endsWith( '/' ) ) { + node.name += '/'; + } + if ( node.children?.length ) { + node.children = finalizeNodes( node.children ); + } + } + return [ ...nodes ].sort( ( a, b ) => a.name.localeCompare( b.name ) ); +} + +/** + * Parse reprint's remote index into the nested wp-content child tree, + * keeping only directories. A path counts as a directory when it has + * indexed descendants, is `type:dir`, or is a `link` whose target has + * indexed descendants (covers wp.com's per-plugin symlinks). + */ +function parseIndexChildren( remoteIndexPath: string, contentDir: string ): TreeNode[] { + const contentRoot = contentDir.replace( /\/+$/, '' ); + const prefix = contentRoot + '/'; + + let raw: string; + try { + raw = fs.readFileSync( remoteIndexPath, 'utf-8' ); + } catch { + return []; + } + + const entryByPath = new Map< string, { type?: string; target?: string } >(); + const dirPrefixes = new Set< string >(); + const contentEntries: string[] = []; + + for ( const line of raw.split( '\n' ) ) { + if ( ! line.trim() ) { + continue; + } + let entry: { path?: string; type?: string; target?: string }; + try { + entry = JSON.parse( line ); + } catch { + continue; + } + if ( typeof entry.path !== 'string' || entry.path === '' ) { + continue; + } + const absolutePath = Buffer.from( entry.path, 'base64' ).toString( 'utf-8' ); + const target = + typeof entry.target === 'string' && entry.target + ? Buffer.from( entry.target, 'base64' ).toString( 'utf-8' ) + : undefined; + entryByPath.set( absolutePath, { type: entry.type, target } ); + + const segments = absolutePath.split( '/' ); + for ( let i = 1; i < segments.length; i++ ) { + dirPrefixes.add( segments.slice( 0, i ).join( '/' ) ); + } + + if ( absolutePath.startsWith( prefix ) ) { + contentEntries.push( absolutePath ); + } + } + + const isDirectory = ( absolutePath: string ): boolean => { + if ( dirPrefixes.has( absolutePath ) ) { + return true; + } + const entry = entryByPath.get( absolutePath ); + if ( entry?.type === 'dir' ) { + return true; + } + return entry?.type === 'link' && !! entry.target && dirPrefixes.has( entry.target ); + }; + + const rootChildren: TreeNode[] = []; + const byPath = new Map< string, TreeNode >(); + + for ( const absolutePath of contentEntries ) { + const relativePath = absolutePath.slice( prefix.length ).replace( /\/+$/, '' ); + if ( ! relativePath ) { + continue; + } + const segments = relativePath.split( '/' ); + let parentChildren = rootChildren; + let currentRel = ''; + let currentAbs = contentRoot; + + for ( let i = 0; i < segments.length; i++ ) { + const segment = segments[ i ]; + currentRel = currentRel ? `${ currentRel }/${ segment }` : segment; + currentAbs = `${ currentAbs }/${ segment }`; + if ( ! isDirectory( currentAbs ) ) { + break; + } + + let node = byPath.get( currentRel ); + if ( ! node ) { + node = { + name: segment, + value: currentRel, + isDirectory: true, + checked: true, + expanded: false, + depth: i + 1, + children: [], + }; + byPath.set( currentRel, node ); + parentChildren.push( node ); + } + parentChildren = node.children!; + } + } + + return finalizeNodes( rootChildren ); +} + +/** + * Build the selector tree (Database + wp-content) from reprint's remote + * index. Empty when there's no content dir or no wp-content entries. + */ +export function buildReprintTreeFromIndex( + remoteIndexPath: string, + contentDir: string | null +): TreeNode[] { + if ( ! contentDir ) { + return []; + } + const children = parseIndexChildren( remoteIndexPath, contentDir ); + if ( children.length === 0 ) { + return []; + } + return buildRootTree( children ); +} + +/** Map a wp-content-relative node value to a reprint `--only` source value. */ +function valueToOnly( value: string, contentDir: string ): string { + return CONTENT_DIR_TOKENS[ value ] ?? `${ contentDir.replace( /\/+$/, '' ) }/${ value }`; +} + +/** + * Map raw `--only` CLI values (wp-content-relative paths like `plugins/akismet`, + * or pass-through reprint tokens/absolute paths) to reprint `--only` sources. + */ +export function mapCliOnlyToReprint( values: string[], contentDir: string ): string[] { + return values + .map( ( value ) => value.trim() ) + .filter( ( value ) => value.length > 0 ) + .map( ( value ) => { + if ( value.startsWith( ':' ) || value.startsWith( '/' ) ) { + return value; // reprint token or absolute path — pass through + } + const relative = value.replace( /^wp-content\//, '' ).replace( /\/+$/, '' ); + return valueToOnly( relative, contentDir ); + } ); +} + +/** + * Reduce the flat list of checked nodes to the reprint flags, keeping each + * fully-selected directory and dropping its descendants. A checked + * `wp-content` root means everything is selected and no `--only` is needed. + */ +export function mapCheckedNodesToSelection( + selected: TreeNode[], + contentDir: string +): PullSelection { + const checkedValues = new Set( selected.map( ( node ) => node.value ) ); + const skipDatabase = ! checkedValues.has( 'database' ); + + if ( checkedValues.has( 'wp-content' ) ) { + return { fileOnlyPaths: [], skipDatabase, hasAnyFile: true }; + } + + const fileNodes = selected.filter( + ( node ) => node.value !== 'database' && node.value !== 'wp-content' + ); + + const maximal = fileNodes.filter( ( node ) => { + const slash = node.value.lastIndexOf( '/' ); + if ( slash < 0 ) { + return true; + } + return ! checkedValues.has( node.value.slice( 0, slash ) ); + } ); + + return { + fileOnlyPaths: maximal.map( ( node ) => valueToOnly( node.value, contentDir ) ), + skipDatabase, + hasAnyFile: fileNodes.length > 0, + }; +} + +interface FetchReprintPullTreeParams { + stateDirectory: string; + rawDirectory: string; + apiUrl: string; + secret: string; + runtime: SiteRuntime; + verbose: boolean; +} + +/** + * Run `reprint files-index` (requires a prior preflight) and build the + * selector tree from the resulting `.import-remote-index.jsonl`. + */ +export async function fetchReprintPullTree( + params: FetchReprintPullTreeParams +): Promise< { tree: TreeNode[]; contentDir: string | null } > { + const { stateDirectory, rawDirectory, apiUrl, secret, runtime, verbose } = params; + + const contentDir = getContentDirFromState( stateDirectory ); + if ( ! contentDir ) { + return { tree: [], contentDir: null }; + } + + await runReprintCommandUntilComplete( + stateDirectory, + rawDirectory, + [ + 'files-index', + apiUrl, + `--secret=${ secret }`, + '--no-adaptive', + `--state-dir=${ stateDirectory }`, + `--fs-root=${ rawDirectory }`, + ], + undefined, + { + progressLabel: __( 'Scanning remote files' ), + verboseCommands: verbose, + runtime, + } + ); + + const tree = buildReprintTreeFromIndex( getRemoteIndexPath( stateDirectory ), contentDir ); + return { tree, contentDir }; +} + +/** + * Prompt for what to refresh. Returns `undefined` when the user cancels, or + * selects only the database — a database-only refresh isn't supported yet, + * so guidance is printed and the caller aborts. + */ +export async function selectPullItems( + tree: TreeNode[], + contentDir: string +): Promise< PullSelection | undefined > { + const selected = await treeCheckbox( { + message: __( 'Select what to refresh from the remote site' ), + tree, + } ); + + if ( selected.length === 0 ) { + return undefined; + } + + const selection = mapCheckedNodesToSelection( selected, contentDir ); + + if ( ! selection.hasAnyFile ) { + console.log( + __( + 'Refreshing the database on its own is not supported yet. Select at least one folder to refresh.' + ) + ); + return undefined; + } + + return selection; +} diff --git a/apps/cli/lib/pull/reprint-state.test.ts b/apps/cli/lib/pull/reprint-state.test.ts index 1b86198e5a..1bf5e5bbd9 100644 --- a/apps/cli/lib/pull/reprint-state.test.ts +++ b/apps/cli/lib/pull/reprint-state.test.ts @@ -3,6 +3,7 @@ import os from 'os'; import path from 'path'; import { getReprintStatePath, + hasLocalFilesIndex, readReprintState, writeReprintState, } from 'cli/lib/pull/reprint-state'; @@ -90,4 +91,21 @@ describe( 'reprint state accessors', () => { fs.rmSync( stateDirectory, { recursive: true, force: true } ); } } ); + + it( 'reports a local files index only when present and non-empty', () => { + const stateDirectory = fs.mkdtempSync( path.join( os.tmpdir(), 'studio-reprint-state-' ) ); + + try { + const localIndexPath = path.join( stateDirectory, '.import-index.jsonl' ); + expect( hasLocalFilesIndex( stateDirectory ) ).toBe( false ); + + fs.writeFileSync( localIndexPath, '' ); + expect( hasLocalFilesIndex( stateDirectory ) ).toBe( false ); + + fs.writeFileSync( localIndexPath, '{"path":"abc"}\n' ); + expect( hasLocalFilesIndex( stateDirectory ) ).toBe( true ); + } finally { + fs.rmSync( stateDirectory, { recursive: true, force: true } ); + } + } ); } ); diff --git a/apps/cli/lib/pull/reprint-state.ts b/apps/cli/lib/pull/reprint-state.ts index 7d3219093e..0bdd804484 100644 --- a/apps/cli/lib/pull/reprint-state.ts +++ b/apps/cli/lib/pull/reprint-state.ts @@ -18,6 +18,7 @@ import { z } from 'zod'; const STATE_FILE = '.import-state.json'; const REMOTE_INDEX_FILE = '.import-remote-index.jsonl'; +const LOCAL_INDEX_FILE = '.import-index.jsonl'; export const SKIPPED_DOWNLOAD_LIST = '.import-download-list-skipped.jsonl'; export const reprintStateSnapshotSchema = z.looseObject( { @@ -134,6 +135,17 @@ export function hasSkippedFiles( stateDirectory: string ): boolean { return fs.existsSync( skippedListPath ) && fs.statSync( skippedListPath ).size > 0; } +/** + * True when reprint's local file index says a file sync completed, so the + * raw fs-root holds the site (WordPress core included) and a + * `--only`-restricted delta pull is safe. Unlike the durable + * `site.importComplete` flag, this reflects the actual scratch contents. + */ +export function hasLocalFilesIndex( stateDirectory: string ): boolean { + const localIndexPath = path.join( stateDirectory, LOCAL_INDEX_FILE ); + return fs.existsSync( localIndexPath ) && fs.statSync( localIndexPath ).size > 0; +} + /** * Wipe the reprint state + derived indexes so the next run starts an * essential-files sync from scratch — but preserve preflight data so diff --git a/apps/cli/lib/sync-selector.ts b/apps/cli/lib/sync-selector.ts index 412558c408..4b9dac17e5 100644 --- a/apps/cli/lib/sync-selector.ts +++ b/apps/cli/lib/sync-selector.ts @@ -45,7 +45,7 @@ function buildTreeFromRemote( entries: RemoteFileEntry[], depth: number = 1 ): T ); } -function buildRootTree( wpContentChildren: TreeNode[] ): TreeNode[] { +export function buildRootTree( wpContentChildren: TreeNode[] ): TreeNode[] { return [ { name: __( 'Database' ),