@@ -46,6 +46,22 @@ export function parseInlineTokens(tokens: MarkdownToken[], raw?: string, pPreTok
4646 const result : ParsedNode [ ] = [ ]
4747 let currentTextNode : TextNode | null = null
4848
49+ // Fallback: markdown-it sometimes mis-tokenizes code_inline around CJK text
50+ // yielding code_inline nodes that contain non-ASCII prose (e.g. "方法会根据").
51+ // When detected, prefer re-parsing from the original raw string to rebuild
52+ // inline_code spans and basic strong emphasis segments in a stable way.
53+ const hasSuspiciousCodeInline = tokens . some ( t => t . type === 'code_inline' && / [ ^ \x00 - \x7F ] / . test ( String ( t . content ?? '' ) ) )
54+ const hasBackticksInRaw = typeof raw === 'string' && / ` / . test ( raw )
55+ const codeInlineCount = tokens . reduce ( ( n , t ) => n + ( t . type === 'code_inline' ? 1 : 0 ) , 0 )
56+ const rawBacktickCount = typeof raw === 'string' ? ( ( raw . match ( / ` / g) || [ ] ) . length ) : 0
57+ // When backtick count is even, expected inline-code spans ~= backtickCount/2.
58+ const expectedSpans = rawBacktickCount % 2 === 0 ? rawBacktickCount / 2 : 0
59+ const mismatchedSpanCount = expectedSpans > 0 && codeInlineCount !== expectedSpans
60+ const hasMathInlineUsingBackticks = tokens . some ( t => t . type === 'math_inline' && / ` / . test ( String ( ( t as any ) . raw ?? t . content ?? '' ) ) )
61+ if ( hasBackticksInRaw && ( hasSuspiciousCodeInline || mismatchedSpanCount || hasMathInlineUsingBackticks ) ) {
62+ return parseFromRawWithCodeAndStrong ( String ( raw ?? '' ) )
63+ }
64+
4965 let i = 0
5066 // Note: strong-token normalization and list-item normalization are
5167 // applied during markdown-it parsing via core rules (plugins that
@@ -174,58 +190,50 @@ export function parseInlineTokens(tokens: MarkdownToken[], raw?: string, pPreTok
174190 if ( ! / ` [ ^ ` ] * / . test ( content ) )
175191 return false
176192
177- // Close any current text node and handle inline code
178- resetCurrentTextNode ( )
179193 const code_start = content . indexOf ( '`' )
180194 const code_end = content . indexOf ( '`' , code_start + 1 )
181- const _text = content . slice ( 0 , code_start )
182- const codeContent = code_end === - 1 ? content . slice ( code_start ) : content . slice ( code_start , code_end )
183- const after = code_end === - 1 ? '' : content . slice ( code_end + 1 )
184- if ( _text ) {
185- // Try to re-run emphasis/strong parsing on the fragment before the code span
186- // but avoid mutating the outer token index `i` (handlers sometimes increment it).
187- const handled = handleEmphasisAndStrikethrough ( _text , _token )
188- // restore index so we don't skip tokens in the outer loop
189- if ( ! handled ) {
190- pushText ( _text , _text )
191- }
192- else {
195+
196+ // If we don't have a closing backtick within this token, don't emit a partial
197+ // inline_code node. Instead, merge the rest of inline tokens into a single
198+ // string and re-run parsing so the code span is handled atomically.
199+ if ( code_end === - 1 ) {
200+ let merged = content
201+ for ( let j = i + 1 ; j < tokens . length ; j ++ )
202+ merged += String ( ( tokens [ j ] . content ?? '' ) + ( tokens [ j ] . markup ?? '' ) )
203+
204+ // Consume to the end since we've merged remaining tokens
205+ i = tokens . length - 1
206+ handleToken ( { type : 'text' , content : merged , raw : merged } as unknown as MarkdownToken )
207+ i ++
208+ return true
209+ }
210+
211+ // Close any current text node and handle the text before the code span
212+ resetCurrentTextNode ( )
213+ const beforeText = content . slice ( 0 , code_start )
214+ const codeContent = content . slice ( code_start + 1 , code_end )
215+ const after = content . slice ( code_end + 1 )
216+
217+ if ( beforeText ) {
218+ // Try to parse emphasis/strong inside the pre-code fragment, without
219+ // advancing the outer token index `i` permanently.
220+ const handled = handleEmphasisAndStrikethrough ( beforeText , _token )
221+ if ( ! handled )
222+ pushText ( beforeText , beforeText )
223+ else
193224 i --
194- }
195225 }
196226
197- const code = codeContent . replace ( / ` / g, '' )
198227 pushParsed ( {
199228 type : 'inline_code' ,
200- code,
201- raw : String ( code ?? '' ) ,
229+ code : codeContent ,
230+ raw : String ( codeContent ?? '' ) ,
202231 } as ParsedNode )
203232
204- // afterCode 可能也存在很多情况包括多个 code,我们递归处理 --- IGNORE ---
205233 if ( after ) {
206- handleToken ( {
207- type : 'text' ,
208- content : after ,
209- raw : String ( after ?? '' ) ,
210- } )
234+ handleToken ( { type : 'text' , content : after , raw : after } as unknown as MarkdownToken )
211235 i --
212236 }
213- else if ( code_end === - 1 ) {
214- // 要把下一个 token 也合并进来,把类型变成 text
215- const nextToken = tokens [ i + 1 ]
216- if ( nextToken ) {
217- let fixedAfter = after
218- for ( let j = i + 1 ; j < tokens . length ; j ++ ) {
219- fixedAfter += String ( ( ( tokens [ j ] . content ?? '' ) + ( tokens [ j ] . markup ?? '' ) ) )
220- }
221- i = tokens . length - 1
222- handleToken ( {
223- type : 'text' ,
224- content : fixedAfter ,
225- raw : String ( fixedAfter ?? '' ) ,
226- } )
227- }
228- }
229237 i ++
230238 return true
231239 }
@@ -863,3 +871,79 @@ export function parseInlineTokens(tokens: MarkdownToken[], raw?: string, pPreTok
863871
864872 return result
865873}
874+
875+ // Minimal, robust fallback parser: split raw by backticks into
876+ // text and inline_code, and parse simple **strong** inside text.
877+ function parseFromRawWithCodeAndStrong ( raw : string ) : ParsedNode [ ] {
878+ // Tokenize raw handling two constructs: backticks `...` and strong **...**
879+ // Build a small AST allowing code inside strong and vice versa.
880+ const root : ParsedNode [ ] = [ ]
881+ const stack : Array < { type : 'root' | 'strong' , children : ParsedNode [ ] } > = [ { type : 'root' , children : root } ]
882+ let i = 0
883+
884+ function cur ( ) { return stack [ stack . length - 1 ] . children }
885+
886+ function pushText ( s : string ) {
887+ if ( ! s )
888+ return
889+ const last = cur ( ) [ cur ( ) . length - 1 ]
890+ if ( last && last . type === 'text' ) {
891+ ( last as any ) . content += s
892+ ; ( last as any ) . raw += s
893+ }
894+ else {
895+ cur ( ) . push ( { type : 'text' , content : s , raw : s } as ParsedNode )
896+ }
897+ }
898+
899+ while ( i < raw . length ) {
900+ // strong open/close
901+ if ( raw [ i ] === '*' && raw [ i + 1 ] === '*' ) {
902+ // If already inside strong, close it; otherwise open new strong
903+ const isClosing = stack . length > 1 && stack [ stack . length - 1 ] . type === 'strong'
904+ i += 2
905+ if ( isClosing ) {
906+ const nodeChildren = stack . pop ( ) ! . children
907+ cur ( ) . push ( { type : 'strong' , children : nodeChildren , raw : `**${ nodeChildren . map ( n => ( n as any ) . raw ?? '' ) . join ( '' ) } **` } as ParsedNode )
908+ }
909+ else {
910+ stack . push ( { type : 'strong' , children : [ ] } )
911+ }
912+ continue
913+ }
914+
915+ // inline code
916+ if ( raw [ i ] === '`' ) {
917+ const start = i
918+ const close = raw . indexOf ( '`' , i + 1 )
919+ if ( close === - 1 ) {
920+ // no closing tick; treat as text
921+ pushText ( raw . slice ( i ) )
922+ break
923+ }
924+ const code = raw . slice ( i + 1 , close )
925+ cur ( ) . push ( { type : 'inline_code' , code, raw : code } as ParsedNode )
926+ i = close + 1
927+ continue
928+ }
929+
930+ // regular text: read until next special
931+ let next = raw . indexOf ( '`' , i )
932+ const nextStrong = raw . indexOf ( '**' , i )
933+ if ( nextStrong !== - 1 && ( next === - 1 || nextStrong < next ) )
934+ next = nextStrong
935+ if ( next === - 1 )
936+ next = raw . length
937+ pushText ( raw . slice ( i , next ) )
938+ i = next
939+ }
940+
941+ // If there are unclosed strongs, degrade them into plain text with ** markers
942+ while ( stack . length > 1 ) {
943+ const dangling = stack . pop ( ) !
944+ const content = dangling . children . map ( n => ( n as any ) . raw ?? ( n as any ) . content ?? '' ) . join ( '' )
945+ pushText ( `**${ content } ` )
946+ }
947+
948+ return root
949+ }
0 commit comments