From 0fd8341c777a847d13073b96d7f3850c65150465 Mon Sep 17 00:00:00 2001 From: Dimitris Massaad Date: Sun, 29 Jun 2025 20:36:26 +0300 Subject: [PATCH] wp:author entries can be multiline --- phpunit/data/valid-wxr-1.1.xml | 8 ++++++++ phpunit/tests/parser.php | 12 ++++++++++++ src/parsers/class-wxr-parser-regex.php | 23 +++++++++++++---------- 3 files changed, 33 insertions(+), 10 deletions(-) diff --git a/phpunit/data/valid-wxr-1.1.xml b/phpunit/data/valid-wxr-1.1.xml index f389741f..12c962fe 100644 --- a/phpunit/data/valid-wxr-1.1.xml +++ b/phpunit/data/valid-wxr-1.1.xml @@ -36,6 +36,14 @@ http://localhost/ 2johnjohndoe@example.org + + 3 + jane + janedoe@example.org + + + + 3alpha 22clippable diff --git a/phpunit/tests/parser.php b/phpunit/tests/parser.php index 3464a5e3..d55ca250 100644 --- a/phpunit/tests/parser.php +++ b/phpunit/tests/parser.php @@ -80,6 +80,18 @@ public function test_wxr_version_1_1() { $result['authors']['john'], $message ); + $this->assertEqualSetsWithIndex( + array( + 'author_id' => 3, + 'author_login' => 'jane', + 'author_email' => 'janedoe@example.org', + 'author_display_name' => 'Jane Doe', + 'author_first_name' => 'Jane', + 'author_last_name' => 'Doe', + ), + $result['authors']['jane'], + $message + ); $this->assertEqualSetsWithIndex( array( 'term_id' => 3, diff --git a/src/parsers/class-wxr-parser-regex.php b/src/parsers/class-wxr-parser-regex.php index 4ac1a96d..de930070 100644 --- a/src/parsers/class-wxr-parser-regex.php +++ b/src/parsers/class-wxr-parser-regex.php @@ -34,6 +34,7 @@ public function parse( $file ) { 'wp:category' => array( 'categories', array( $this, 'process_category' ) ), 'wp:tag' => array( 'tags', array( $this, 'process_tag' ) ), 'wp:term' => array( 'terms', array( $this, 'process_term' ) ), + 'wp:author' => array( 'authors', array( $this, 'process_author' ) ), ); $fp = $this->fopen( $file, 'r' ); @@ -60,20 +61,17 @@ public function parse( $file ) { $this->base_blog_url = $this->base_url; } - if ( false !== strpos( $importline, '' ) ) { - preg_match( '|(.*?)|is', $importline, $author ); - $a = $this->process_author( $author[1] ); - $this->authors[ $a['author_login'] ] = $a; - continue; - } - foreach ( $multiline_tags as $tag => $handler ) { // Handle multi-line tags on a singular line $pos = strpos( $importline, "<$tag>" ); $pos_closing = strpos( $importline, "" ); if ( preg_match( '|<' . $tag . '>(.*?)|is', $importline, $matches ) ) { - $this->{$handler[0]}[] = call_user_func( $handler[1], $matches[1] ); - + $result = call_user_func( $handler[1], $matches[1] ); + if ( 'wp:author' === $tag && isset( $result['author_login'] ) ) { + $this->authors[ $result['author_login'] ] = $result; + } else { + $this->{$handler[0]}[] = $result; + } } elseif ( false !== $pos ) { // Take note of any content after the opening tag $multiline_content = trim( substr( $importline, $pos + strlen( $tag ) + 2 ) ); @@ -86,7 +84,12 @@ public function parse( $file ) { $in_multiline = false; $multiline_content .= trim( substr( $importline, 0, $pos_closing ) ); - $this->{$handler[0]}[] = call_user_func( $handler[1], $multiline_content ); + $result = call_user_func( $handler[1], $multiline_content ); + if ( 'wp:author' === $tag && isset( $result['author_login'] ) ) { + $this->authors[ $result['author_login'] ] = $result; + } else { + $this->{$handler[0]}[] = $result; + } } }