@@ -4,7 +4,7 @@ use diagnostics::make_unclosed_delims_error;
44use rustc_ast:: ast:: { self , AttrStyle } ;
55use rustc_ast:: token:: { self , CommentKind , Delimiter , IdentIsRaw , Token , TokenKind } ;
66use rustc_ast:: tokenstream:: TokenStream ;
7- use rustc_ast:: util:: unicode:: contains_text_flow_control_chars;
7+ use rustc_ast:: util:: unicode:: { TEXT_FLOW_CONTROL_CHARS , contains_text_flow_control_chars} ;
88use rustc_errors:: codes:: * ;
99use rustc_errors:: { Applicability , Diag , DiagCtxtHandle , StashKey } ;
1010use rustc_lexer:: {
@@ -14,7 +14,7 @@ use rustc_literal_escaper::{EscapeError, Mode, unescape_mixed, unescape_unicode}
1414use rustc_session:: lint:: BuiltinLintDiag ;
1515use rustc_session:: lint:: builtin:: {
1616 RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX , RUST_2024_GUARDED_STRING_INCOMPATIBLE_SYNTAX ,
17- TEXT_DIRECTION_CODEPOINT_IN_COMMENT ,
17+ TEXT_DIRECTION_CODEPOINT_IN_COMMENT , TEXT_DIRECTION_CODEPOINT_IN_LITERAL ,
1818} ;
1919use rustc_session:: parse:: ParseSess ;
2020use rustc_span:: { BytePos , Pos , Span , Symbol , sym} ;
@@ -174,6 +174,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
174174 // Opening delimiter of the length 3 is not included into the symbol.
175175 let content_start = start + BytePos ( 3 ) ;
176176 let content = self . str_from ( content_start) ;
177+ self . lint_doc_comment_unicode_text_flow ( start, content) ;
177178 self . cook_doc_comment ( content_start, content, CommentKind :: Line , doc_style)
178179 }
179180 rustc_lexer:: TokenKind :: BlockComment { doc_style, terminated } => {
@@ -193,6 +194,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
193194 let content_start = start + BytePos ( 3 ) ;
194195 let content_end = self . pos - BytePos ( if terminated { 2 } else { 0 } ) ;
195196 let content = self . str_from_to ( content_start, content_end) ;
197+ self . lint_doc_comment_unicode_text_flow ( start, content) ;
196198 self . cook_doc_comment ( content_start, content, CommentKind :: Block , doc_style)
197199 }
198200 rustc_lexer:: TokenKind :: Frontmatter { has_invalid_preceding_whitespace, invalid_infostring } => {
@@ -287,6 +289,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
287289 } else {
288290 None
289291 } ;
292+ self . lint_literal_unicode_text_flow ( symbol, kind, self . mk_sp ( start, self . pos ) , "literal" ) ;
290293 token:: Literal ( token:: Lit { kind, symbol, suffix } )
291294 }
292295 rustc_lexer:: TokenKind :: Lifetime { starts_with_number } => {
@@ -481,6 +484,88 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
481484 }
482485 }
483486
487+ fn lint_doc_comment_unicode_text_flow ( & mut self , start : BytePos , content : & str ) {
488+ if contains_text_flow_control_chars ( content) {
489+ self . report_text_direction_codepoint (
490+ content,
491+ self . mk_sp ( start, self . pos ) ,
492+ 0 ,
493+ false ,
494+ "doc comment" ,
495+ ) ;
496+ }
497+ }
498+
499+ fn lint_literal_unicode_text_flow (
500+ & mut self ,
501+ text : Symbol ,
502+ lit_kind : token:: LitKind ,
503+ span : Span ,
504+ label : & ' static str ,
505+ ) {
506+ if !contains_text_flow_control_chars ( text. as_str ( ) ) {
507+ return ;
508+ }
509+ let ( padding, point_at_inner_spans) = match lit_kind {
510+ // account for `"` or `'`
511+ token:: LitKind :: Str | token:: LitKind :: Char => ( 1 , true ) ,
512+ // account for `c"`
513+ token:: LitKind :: CStr => ( 2 , true ) ,
514+ // account for `r###"`
515+ token:: LitKind :: StrRaw ( n) => ( n as u32 + 2 , true ) ,
516+ // account for `cr###"`
517+ token:: LitKind :: CStrRaw ( n) => ( n as u32 + 3 , true ) ,
518+ // suppress bad literals.
519+ token:: LitKind :: Err ( _) => return ,
520+ // Be conservative just in case new literals do support these.
521+ _ => ( 0 , false ) ,
522+ } ;
523+ self . report_text_direction_codepoint (
524+ text. as_str ( ) ,
525+ span,
526+ padding,
527+ point_at_inner_spans,
528+ label,
529+ ) ;
530+ }
531+
532+ fn report_text_direction_codepoint (
533+ & self ,
534+ text : & str ,
535+ span : Span ,
536+ padding : u32 ,
537+ point_at_inner_spans : bool ,
538+ label : & str ,
539+ ) {
540+ // Obtain the `Span`s for each of the forbidden chars.
541+ let spans: Vec < _ > = text
542+ . char_indices ( )
543+ . filter_map ( |( i, c) | {
544+ TEXT_FLOW_CONTROL_CHARS . contains ( & c) . then ( || {
545+ let lo = span. lo ( ) + BytePos ( i as u32 + padding) ;
546+ ( c, span. with_lo ( lo) . with_hi ( lo + BytePos ( c. len_utf8 ( ) as u32 ) ) )
547+ } )
548+ } )
549+ . collect ( ) ;
550+
551+ let count = spans. len ( ) ;
552+ let labels = point_at_inner_spans. then_some ( spans. clone ( ) ) ;
553+
554+ self . psess . buffer_lint (
555+ TEXT_DIRECTION_CODEPOINT_IN_LITERAL ,
556+ span,
557+ ast:: CRATE_NODE_ID ,
558+ BuiltinLintDiag :: HiddenUnicodeCodepoints {
559+ label : label. to_string ( ) ,
560+ count,
561+ span_label : span,
562+ labels,
563+ escape : point_at_inner_spans && !spans. is_empty ( ) ,
564+ spans,
565+ } ,
566+ ) ;
567+ }
568+
484569 fn validate_frontmatter (
485570 & self ,
486571 start : BytePos ,
0 commit comments