@@ -46,22 +46,6 @@ export function parseInlineTokens(tokens: MarkdownToken[], raw?: string, pPreTok
4646 const result : ParsedNode [ ] = [ ]
4747 let currentTextNode : TextNode | null = null
4848
49- // Fallback: markdown-it sometimes mis-tokenizes code_inline around CJK text
50- // yielding code_inline nodes that contain non-ASCII prose (e.g. "方法会根据").
51- // When detected, prefer re-parsing from the original raw string to rebuild
52- // inline_code spans and basic strong emphasis segments in a stable way.
53- const hasSuspiciousCodeInline = tokens . some ( t => t . type === 'code_inline' && / [ ^ \x00 - \x7F ] / . test ( String ( t . content ?? '' ) ) )
54- const hasBackticksInRaw = typeof raw === 'string' && / ` / . test ( raw )
55- const codeInlineCount = tokens . reduce ( ( n , t ) => n + ( t . type === 'code_inline' ? 1 : 0 ) , 0 )
56- const rawBacktickCount = typeof raw === 'string' ? ( ( raw . match ( / ` / g) || [ ] ) . length ) : 0
57- // When backtick count is even, expected inline-code spans ~= backtickCount/2.
58- const expectedSpans = rawBacktickCount % 2 === 0 ? rawBacktickCount / 2 : 0
59- const mismatchedSpanCount = expectedSpans > 0 && codeInlineCount !== expectedSpans
60- const hasMathInlineUsingBackticks = tokens . some ( t => t . type === 'math_inline' && / ` / . test ( String ( ( t as any ) . raw ?? t . content ?? '' ) ) )
61- if ( hasBackticksInRaw && ( hasSuspiciousCodeInline || mismatchedSpanCount || hasMathInlineUsingBackticks ) ) {
62- return parseFromRawWithCodeAndStrong ( String ( raw ?? '' ) )
63- }
64-
6549 let i = 0
6650 // Note: strong-token normalization and list-item normalization are
6751 // applied during markdown-it parsing via core rules (plugins that
@@ -190,50 +174,58 @@ export function parseInlineTokens(tokens: MarkdownToken[], raw?: string, pPreTok
190174 if ( ! / ` [ ^ ` ] * / . test ( content ) )
191175 return false
192176
177+ // Close any current text node and handle inline code
178+ resetCurrentTextNode ( )
193179 const code_start = content . indexOf ( '`' )
194180 const code_end = content . indexOf ( '`' , code_start + 1 )
195-
196- // If we don't have a closing backtick within this token, don't emit a partial
197- // inline_code node. Instead, merge the rest of inline tokens into a single
198- // string and re-run parsing so the code span is handled atomically.
199- if ( code_end === - 1 ) {
200- let merged = content
201- for ( let j = i + 1 ; j < tokens . length ; j ++ )
202- merged += String ( ( tokens [ j ] . content ?? '' ) + ( tokens [ j ] . markup ?? '' ) )
203-
204- // Consume to the end since we've merged remaining tokens
205- i = tokens . length - 1
206- handleToken ( { type : 'text' , content : merged , raw : merged } as unknown as MarkdownToken )
207- i ++
208- return true
209- }
210-
211- // Close any current text node and handle the text before the code span
212- resetCurrentTextNode ( )
213- const beforeText = content . slice ( 0 , code_start )
214- const codeContent = content . slice ( code_start + 1 , code_end )
215- const after = content . slice ( code_end + 1 )
216-
217- if ( beforeText ) {
218- // Try to parse emphasis/strong inside the pre-code fragment, without
219- // advancing the outer token index `i` permanently.
220- const handled = handleEmphasisAndStrikethrough ( beforeText , _token )
221- if ( ! handled )
222- pushText ( beforeText , beforeText )
223- else
181+ const _text = content . slice ( 0 , code_start )
182+ const codeContent = code_end === - 1 ? content . slice ( code_start ) : content . slice ( code_start , code_end )
183+ const after = code_end === - 1 ? '' : content . slice ( code_end + 1 )
184+ if ( _text ) {
185+ // Try to re-run emphasis/strong parsing on the fragment before the code span
186+ // but avoid mutating the outer token index `i` (handlers sometimes increment it).
187+ const handled = handleEmphasisAndStrikethrough ( _text , _token )
188+ // restore index so we don't skip tokens in the outer loop
189+ if ( ! handled ) {
190+ pushText ( _text , _text )
191+ }
192+ else {
224193 i --
194+ }
225195 }
226196
197+ const code = codeContent . replace ( / ` / g, '' )
227198 pushParsed ( {
228199 type : 'inline_code' ,
229- code : codeContent ,
230- raw : String ( codeContent ?? '' ) ,
200+ code,
201+ raw : String ( code ?? '' ) ,
231202 } as ParsedNode )
232203
204+ // afterCode 可能也存在很多情况包括多个 code,我们递归处理 --- IGNORE ---
233205 if ( after ) {
234- handleToken ( { type : 'text' , content : after , raw : after } as unknown as MarkdownToken )
206+ handleToken ( {
207+ type : 'text' ,
208+ content : after ,
209+ raw : String ( after ?? '' ) ,
210+ } )
235211 i --
236212 }
213+ else if ( code_end === - 1 ) {
214+ // 要把下一个 token 也合并进来,把类型变成 text
215+ const nextToken = tokens [ i + 1 ]
216+ if ( nextToken ) {
217+ let fixedAfter = after
218+ for ( let j = i + 1 ; j < tokens . length ; j ++ ) {
219+ fixedAfter += String ( ( ( tokens [ j ] . content ?? '' ) + ( tokens [ j ] . markup ?? '' ) ) )
220+ }
221+ i = tokens . length - 1
222+ handleToken ( {
223+ type : 'text' ,
224+ content : fixedAfter ,
225+ raw : String ( fixedAfter ?? '' ) ,
226+ } )
227+ }
228+ }
237229 i ++
238230 return true
239231 }
@@ -871,79 +863,3 @@ export function parseInlineTokens(tokens: MarkdownToken[], raw?: string, pPreTok
871863
872864 return result
873865}
874-
875- // Minimal, robust fallback parser: split raw by backticks into
876- // text and inline_code, and parse simple **strong** inside text.
877- function parseFromRawWithCodeAndStrong ( raw : string ) : ParsedNode [ ] {
878- // Tokenize raw handling two constructs: backticks `...` and strong **...**
879- // Build a small AST allowing code inside strong and vice versa.
880- const root : ParsedNode [ ] = [ ]
881- const stack : Array < { type : 'root' | 'strong' , children : ParsedNode [ ] } > = [ { type : 'root' , children : root } ]
882- let i = 0
883-
884- function cur ( ) { return stack [ stack . length - 1 ] . children }
885-
886- function pushText ( s : string ) {
887- if ( ! s )
888- return
889- const last = cur ( ) [ cur ( ) . length - 1 ]
890- if ( last && last . type === 'text' ) {
891- ( last as any ) . content += s
892- ; ( last as any ) . raw += s
893- }
894- else {
895- cur ( ) . push ( { type : 'text' , content : s , raw : s } as ParsedNode )
896- }
897- }
898-
899- while ( i < raw . length ) {
900- // strong open/close
901- if ( raw [ i ] === '*' && raw [ i + 1 ] === '*' ) {
902- // If already inside strong, close it; otherwise open new strong
903- const isClosing = stack . length > 1 && stack [ stack . length - 1 ] . type === 'strong'
904- i += 2
905- if ( isClosing ) {
906- const nodeChildren = stack . pop ( ) ! . children
907- cur ( ) . push ( { type : 'strong' , children : nodeChildren , raw : `**${ nodeChildren . map ( n => ( n as any ) . raw ?? '' ) . join ( '' ) } **` } as ParsedNode )
908- }
909- else {
910- stack . push ( { type : 'strong' , children : [ ] } )
911- }
912- continue
913- }
914-
915- // inline code
916- if ( raw [ i ] === '`' ) {
917- const start = i
918- const close = raw . indexOf ( '`' , i + 1 )
919- if ( close === - 1 ) {
920- // no closing tick; treat as text
921- pushText ( raw . slice ( i ) )
922- break
923- }
924- const code = raw . slice ( i + 1 , close )
925- cur ( ) . push ( { type : 'inline_code' , code, raw : code } as ParsedNode )
926- i = close + 1
927- continue
928- }
929-
930- // regular text: read until next special
931- let next = raw . indexOf ( '`' , i )
932- const nextStrong = raw . indexOf ( '**' , i )
933- if ( nextStrong !== - 1 && ( next === - 1 || nextStrong < next ) )
934- next = nextStrong
935- if ( next === - 1 )
936- next = raw . length
937- pushText ( raw . slice ( i , next ) )
938- i = next
939- }
940-
941- // If there are unclosed strongs, degrade them into plain text with ** markers
942- while ( stack . length > 1 ) {
943- const dangling = stack . pop ( ) !
944- const content = dangling . children . map ( n => ( n as any ) . raw ?? ( n as any ) . content ?? '' ) . join ( '' )
945- pushText ( `**${ content } ` )
946- }
947-
948- return root
949- }
0 commit comments