From 8c755e1660dbb051ee34971bcdc034d012989e90 Mon Sep 17 00:00:00 2001 From: Angela Blake Date: Tue, 30 Jun 2026 17:45:02 -0500 Subject: [PATCH] SEO: fix FAQPage schema to read the question from saved markup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The core/details block stores its summary as a source:"rich-text" attribute, which lives in the saved markup, not in the comment. parse_blocks() does not resolve source-based attributes, so $block['attrs']['summary'] is always empty for real, editor-saved blocks — the question came back blank, every item was skipped, and build_faq() returned null. FAQPage JSON-LD has therefore never emitted from real editor content since it shipped (JETPACK-1680). Read the summary from the block's inner HTML instead, reduced to decoded plain text (shared with the answer extraction). Replace the unit-test fixtures that baked the summary into the block comment — which Gutenberg never does — with realistic editor-saved markup, so the regression is actually covered. JETPACK-1793 Co-Authored-By: Claude Opus 4.8 (1M context) --- .../seo/changelog/fix-seo-faq-schema-summary | 4 ++ .../seo/src/class-post-schema-node.php | 37 ++++++++++++- .../seo/tests/php/PostSchemaNodeTest.php | 53 +++++++++++++++++-- .../seo/tests/php/SchemaBuilderTest.php | 4 +- 4 files changed, 90 insertions(+), 8 deletions(-) create mode 100644 projects/packages/seo/changelog/fix-seo-faq-schema-summary diff --git a/projects/packages/seo/changelog/fix-seo-faq-schema-summary b/projects/packages/seo/changelog/fix-seo-faq-schema-summary new file mode 100644 index 00000000000..b7d39db9b35 --- /dev/null +++ b/projects/packages/seo/changelog/fix-seo-faq-schema-summary @@ -0,0 +1,4 @@ +Significance: patch +Type: fixed + +FAQ schema: read the question from the saved `` markup so FAQPage JSON-LD emits from real editor-saved Details blocks. diff --git a/projects/packages/seo/src/class-post-schema-node.php b/projects/packages/seo/src/class-post-schema-node.php index 9145a440d5f..0a6c299aad5 100644 --- a/projects/packages/seo/src/class-post-schema-node.php +++ b/projects/packages/seo/src/class-post-schema-node.php @@ -128,7 +128,7 @@ private static function build_faq( WP_Post $post ) { if ( 'core/details' !== ( $block['blockName'] ?? '' ) ) { continue; } - $question = trim( (string) ( $block['attrs']['summary'] ?? '' ) ); + $question = self::question_from_details_block( $block ); // Render only the inner blocks for the answer. Rendering the whole // core/details block would re-include the (the question). @@ -136,7 +136,7 @@ private static function build_faq( WP_Post $post ) { foreach ( $block['innerBlocks'] ?? array() as $inner_block ) { $answer_html .= render_block( $inner_block ); } - $answer = trim( wp_strip_all_tags( $answer_html ) ); + $answer = self::to_plain_text( $answer_html ); if ( '' === $question || '' === $answer ) { continue; } @@ -159,4 +159,37 @@ private static function build_faq( WP_Post $post ) { 'mainEntity' => $items, ); } + + /** + * Extract the question text from a `core/details` block's ``. + * + * The Details block declares `summary` as a `source: "rich-text"` attribute, + * so the value is saved in the `` markup, not in the + * `` comment. `parse_blocks()` does not resolve + * source-based attributes (it only returns what's written into the comment), + * so `$block['attrs']['summary']` is always empty for real, editor-saved + * blocks. The summary text does survive in the block's inner HTML, so we read + * it from there instead. + * + * @param array $block A parsed `core/details` block. + * @return string The plain-text question, or '' when the block has no summary. + */ + private static function question_from_details_block( array $block ) { + $inner_html = (string) ( $block['innerHTML'] ?? '' ); + if ( ! preg_match( '#]*>(.*?)#is', $inner_html, $matches ) ) { + return ''; + } + return self::to_plain_text( $matches[1] ); + } + + /** + * Reduce a fragment of post HTML to the plain text used for a schema value: + * tags stripped, entities decoded, surrounding whitespace trimmed. + * + * @param string $html HTML fragment. + * @return string + */ + private static function to_plain_text( $html ) { + return trim( html_entity_decode( wp_strip_all_tags( (string) $html ), ENT_QUOTES, 'UTF-8' ) ); + } } diff --git a/projects/packages/seo/tests/php/PostSchemaNodeTest.php b/projects/packages/seo/tests/php/PostSchemaNodeTest.php index eb9acd71a97..df80da526d2 100644 --- a/projects/packages/seo/tests/php/PostSchemaNodeTest.php +++ b/projects/packages/seo/tests/php/PostSchemaNodeTest.php @@ -131,13 +131,18 @@ public function test_published_page_has_no_default_schema() { } /** - * FAQPage answers are built from a `core/details` block's inner blocks only, - * so the question (the ``) is not duplicated into the answer text. + * The FAQ question is read from the saved `` markup, and the answer + * from the inner blocks only — so the question is not duplicated into the + * answer text. The fixture uses realistic, editor-saved markup: the summary + * lives only in `` (a `source: "rich-text"` attribute), never in the + * `` comment, which is what `parse_blocks()` actually + * returns. Baking the summary into the comment instead would mask the bug + * this builder has to handle (see JETPACK-1793). */ - public function test_faq_answer_excludes_the_question() { + public function test_faq_question_from_summary_and_answer_excludes_it() { \Jetpack_SEO_Posts::$schema_type = 'faq'; - $content = ''; + $content = ''; $content .= '
What is SEO?'; $content .= '

Search engine optimization.

'; $content .= '
'; @@ -155,6 +160,46 @@ public function test_faq_answer_excludes_the_question() { $this->assertStringNotContainsString( 'What is SEO?', $item['acceptedAnswer']['text'] ); } + /** + * The summary is rich text, so it may carry inline formatting and HTML + * entities. The question must be reduced to decoded plain text, and multiple + * Details blocks each become their own Question entity in document order. + */ + public function test_faq_summary_is_decoded_plain_text_across_multiple_blocks() { + \Jetpack_SEO_Posts::$schema_type = 'faq'; + + $content = ''; + $content .= '
What is SEO & AEO?'; + $content .= '

Optimization for search and answer engines.

'; + $content .= '
'; + $content .= ''; + $content .= '
Is it free?'; + $content .= '

Yes.

'; + $content .= '
'; + + $node = Post_Schema_Node::build( $this->make_post( array( 'post_content' => $content ) ) ); + + $this->assertIsArray( $node ); + $this->assertCount( 2, $node['mainEntity'] ); + $this->assertSame( 'What is SEO & AEO?', $node['mainEntity'][0]['name'] ); + $this->assertSame( 'Is it free?', $node['mainEntity'][1]['name'] ); + } + + /** + * A Details block whose `` is empty produces no question, so it is + * skipped rather than emitting a Question with a blank name. + */ + public function test_faq_skips_details_block_with_empty_summary() { + \Jetpack_SEO_Posts::$schema_type = 'faq'; + + $content = ''; + $content .= '
'; + $content .= '

An answer with no question.

'; + $content .= '
'; + + $this->assertNull( Post_Schema_Node::build( $this->make_post( array( 'post_content' => $content ) ) ) ); + } + /** * A "faq" override with no `core/details` blocks yields no node, rather than * an empty/invalid FAQPage. diff --git a/projects/packages/seo/tests/php/SchemaBuilderTest.php b/projects/packages/seo/tests/php/SchemaBuilderTest.php index 37fe1f60044..e04a87e8e8c 100644 --- a/projects/packages/seo/tests/php/SchemaBuilderTest.php +++ b/projects/packages/seo/tests/php/SchemaBuilderTest.php @@ -195,7 +195,7 @@ public function test_emits_graph_with_article_for_published_post() { public function test_emits_graph_with_faqpage_for_faq_override() { \Jetpack_SEO_Posts::$schema_type = 'faq'; - $content = ''; + $content = ''; $content .= '
What is SEO?'; $content .= '

Search engine optimization.

'; $content .= '
'; @@ -235,7 +235,7 @@ public function test_faqpage_has_no_publisher_but_graph_has_organization() { $this->set_site_name( 'Acme Co' ); \Jetpack_SEO_Posts::$schema_type = 'faq'; - $content = ''; + $content = ''; $content .= '
What is SEO?'; $content .= '

Search engine optimization.

'; $content .= '
';