@@ -494,25 +494,22 @@ class V2exParser {
494494
495495 // 方法1: 从script标签中提取
496496 const scriptContent = $ ( 'script' ) . text ( ) ;
497- const addressMatch = scriptContent . match ( / c o n s t a d d r e s s = " ( [ A - Z a - z 0 - 9 ] { 32 - 4 4 } ) " / ) ;
497+ const addressMatch = scriptContent . match ( / c o n s t a d d r e s s = " ( [ A - Z a - z 0 - 9 ] { 32 , 44 } ) " / ) ;
498498 if ( addressMatch ) {
499499 result . solanaAddress = addressMatch [ 1 ] ;
500500 }
501501
502- // 方法2: 从页面文本中查找Solana地址格式
502+ // 方法2: 使用更稳健的文本提取(带边界与URL过滤)
503503 const pageText = $ . text ( ) ;
504- const solanaAddressRegex = / [ 1 - 9 A - H J - N P - Z a - k m - z ] { 32 , 44 } / g;
505- const addresses = pageText . match ( solanaAddressRegex ) ;
506- if ( addresses && addresses . length > 0 ) {
507- // 过滤掉可能的其他Base58编码字符串
508- const validAddress = addresses . find ( addr => addr . length >= 32 && addr . length <= 44 ) ;
509- if ( validAddress ) {
510- result . solanaAddress = validAddress ;
511- }
504+ const sanitizedText = this . removeUrlTransactionSignatures ( pageText ) ;
505+ const extracted = this . extractSolanaAddressesWithBoundary ( sanitizedText ) ;
506+ if ( extracted && extracted . length > 0 ) {
507+ result . solanaAddress = result . solanaAddress || extracted [ 0 ] ;
512508 }
513509
514510 // 提取sol域名
515- result . solanaDomain = this . extractSolanaDomain ( pageText ) ;
511+ const domains = this . extractSolanaDomainsFromText ( pageText ) ;
512+ result . solanaDomain = domains && domains . length > 0 ? domains [ 0 ] : null ;
516513
517514 return result ;
518515 }
@@ -524,26 +521,27 @@ class V2exParser {
524521 */
525522 extractSolanaDomain ( text ) {
526523 if ( ! text ) return null ;
524+ const domains = this . extractSolanaDomainsFromText ( text ) ;
525+ return domains && domains . length > 0 ? domains [ 0 ] : null ;
526+ }
527527
528- // 匹配.sol域名的正则表达式
529- // 确保域名前后没有字符(空白字符除外)
530- const domainRegex = / (?< ! \S ) \. s o l (? = \s | $ ) / g;
531- const matches = text . match ( domainRegex ) ;
532-
533- if ( matches && matches . length > 0 ) {
534- // 返回第一个匹配的域名
535- return matches [ 0 ] ;
536- }
537-
538- // 如果没有找到.sol,尝试查找其他可能的域名格式
539- const generalDomainRegex = / (?< ! \S ) ( [ a - z A - Z 0 - 9 - ] + \. s o l ) (? = \s | $ ) / g;
540- const generalMatches = text . match ( generalDomainRegex ) ;
541-
542- if ( generalMatches && generalMatches . length > 0 ) {
543- return generalMatches [ 0 ] ;
528+ /**
529+ * 从文本中移除交易签名类URL,避免将签名误识别为地址
530+ * 例如: solscan.io/tx/<signature>、explorer.solana.com/tx/<signature>
531+ * @param {string } text
532+ * @returns {string }
533+ */
534+ removeUrlTransactionSignatures ( text ) {
535+ if ( ! text ) return '' ;
536+ const patterns = [
537+ / h t t p s ? : \/ \/ [ ^ \s ] * \/ ( t x | t r a n s a c t i o n | c o n f i r m T r a n s a c t i o n ) \/ [ ^ \s ] * / gi,
538+ / h t t p s ? : \/ \/ [ ^ \s ] * \/ t x s \/ [ ^ \s ] * / gi
539+ ] ;
540+ let sanitized = text ;
541+ for ( const p of patterns ) {
542+ sanitized = sanitized . replace ( p , ' ' ) ;
544543 }
545-
546- return null ;
544+ return sanitized ;
547545 }
548546
549547 /**
@@ -645,7 +643,7 @@ class V2exParser {
645643
646644 // 匹配.sol域名的正则表达式
647645 // 确保域名前后没有字符(空白字符除外)
648- const domainRegex = / (?< ! \S ) ( [ a - z A - Z 0 - 9 - ] + \. s o l ) (? = \s | $ ) / g;
646+ const domainRegex = / (?< ! \S ) ( [ a - z A - Z 0 - 9 _ - ] + \. s o l ) (? = \s | $ ) / g;
649647 const matches = text . match ( domainRegex ) ;
650648
651649 if ( matches && matches . length > 0 ) {
@@ -657,9 +655,9 @@ class V2exParser {
657655 // 确保以.sol结尾
658656 if ( ! domain . endsWith ( '.sol' ) ) return false ;
659657
660- // 确保域名部分只包含字母、数字和连字符
658+ // 确保域名部分只包含字母、数字、下划线和连字符
661659 const domainPart = domain . replace ( '.sol' , '' ) ;
662- if ( ! / ^ [ a - z A - Z 0 - 9 - ] + $ / . test ( domainPart ) ) return false ;
660+ if ( ! / ^ [ a - z A - Z 0 - 9 _ - ] + $ / . test ( domainPart ) ) return false ;
663661
664662 return true ;
665663 } ) ;
@@ -687,20 +685,21 @@ class V2exParser {
687685
688686 const addresses = [ ] ;
689687
690- // 使用更精确的正则表达式,确保地址前后有边界
691- // 边界可以是:行首、行尾、空格、标点符号等
692- const addressRegex = / (?< ! \S ) ( [ 1 - 9 A - H J - N P - Z a - k m - z ] { 32 , 44 } ) (? = \s | $ | [ ^ \w ] ) / g;
688+ // 使用更稳健的正则表达式,允许中文标点/文字作为边界
689+ // 左边界:行首或非Base58字符;右边界:非Base58字符或行尾
690+ const addressRegex = / (?: ^ | [ ^ 1 - 9 A - H J - N P - Z a - k m - z ] ) ( [ 1 - 9 A - H J - N P - Z a - k m - z ] { 32 , 44 } ) (? ! [ 1 - 9 A - H J - N P - Z a - k m - z ] ) / g;
693691
694692 let match ;
695693 while ( ( match = addressRegex . exec ( text ) ) !== null ) {
696694 const address = match [ 1 ] ;
697695
698- // 验证地址的有效性
699- if ( this . isValidSolanaAddress ( address ) ) {
700- // 进一步检查:确保不是URL的一部分
701- if ( ! this . isPartOfUrl ( text , match . index , address . length ) ) {
702- addresses . push ( address ) ;
703- }
696+ // 计算地址在原文本中的真实起始位置(排除左边界占位字符)
697+ const precedingOffset = match [ 0 ] . length - match [ 1 ] . length ;
698+ const addressStartIndex = match . index + precedingOffset ;
699+
700+ // 验证地址的有效性并确保不是URL的一部分
701+ if ( this . isValidSolanaAddress ( address ) && ! this . isPartOfUrl ( text , addressStartIndex , address . length ) ) {
702+ addresses . push ( address ) ;
704703 }
705704 }
706705
@@ -723,8 +722,8 @@ class V2exParser {
723722 */
724723 isPartOfUrl ( text , startIndex , addressLength ) {
725724 // 检查地址前后是否有URL特征
726- const beforeText = text . substring ( Math . max ( 0 , startIndex - 20 ) , startIndex ) ;
727- const afterText = text . substring ( startIndex + addressLength , Math . min ( text . length , startIndex + addressLength + 20 ) ) ;
725+ const beforeText = text . substring ( Math . max ( 0 , startIndex - 200 ) , startIndex ) ;
726+ const afterText = text . substring ( startIndex + addressLength , Math . min ( text . length , startIndex + addressLength + 200 ) ) ;
728727
729728 // URL特征:包含http、https、www、.com、.io等
730729 const urlPatterns = [
@@ -737,6 +736,12 @@ class V2exParser {
737736
738737 const combinedText = beforeText + afterText ;
739738
739+ // 直接邻近检查,避免窗口截断导致漏判
740+ const immediateBefore = text . substring ( Math . max ( 0 , startIndex - 4 ) , startIndex ) ;
741+ if ( immediateBefore === '/tx/' || immediateBefore === 'tx/' || immediateBefore === '/address/' || immediateBefore === 'address/' ) {
742+ return true ;
743+ }
744+
740745 return urlPatterns . some ( pattern => pattern . test ( combinedText ) ) ;
741746 }
742747
0 commit comments