@@ -4,9 +4,18 @@ use memchr::memchr2_iter;
44use std:: borrow:: Cow ;
55use std:: ops:: Range ;
66
7+ use jetscii:: bytes;
8+ use memchr;
9+ use once_cell:: sync:: Lazy ;
10+
711#[ cfg( test) ]
812use pretty_assertions:: assert_eq;
913
14+
15+ static XML_ESCAPE_BYTES : Lazy < jetscii:: BytesConst > =
16+ Lazy :: new ( || bytes ! ( b'<' , b'>' , b'&' , b'\'' , b'"' ) ) ;
17+ static XML_PARTIAL_ESCAPE_BYTES : Lazy < jetscii:: BytesConst > = Lazy :: new ( || bytes ! ( b'<' , b'>' , b'&' ) ) ;
18+
1019/// Error for XML escape / unescape.
1120#[ derive( Clone , Debug ) ]
1221pub enum EscapeError {
@@ -72,7 +81,8 @@ impl std::error::Error for EscapeError {}
7281/// | `'` | `'`
7382/// | `"` | `"`
7483pub fn escape ( raw : & str ) -> Cow < str > {
75- _escape ( raw, |ch| matches ! ( ch, b'<' | b'>' | b'&' | b'\'' | b'\"' ) )
84+ // _escape(raw, |ch| matches!(ch, b'<' | b'>' | b'&' | b'\'' | b'\"'))
85+ simd_escape ( raw, & XML_ESCAPE_BYTES )
7686}
7787
7888/// Escapes an `&str` and replaces xml special characters (`<`, `>`, `&`)
@@ -89,9 +99,11 @@ pub fn escape(raw: &str) -> Cow<str> {
8999/// | `>` | `>`
90100/// | `&` | `&`
91101pub fn partial_escape ( raw : & str ) -> Cow < str > {
92- _escape ( raw, |ch| matches ! ( ch, b'<' | b'>' | b'&' ) )
102+ // _escape(raw, |ch| matches!(ch, b'<' | b'>' | b'&'))
103+ simd_escape ( raw, & XML_PARTIAL_ESCAPE_BYTES )
93104}
94105
106+
95107/// Escapes an `&str` and replaces a subset of xml special characters (`<`, `>`,
96108/// `&`, `'`, `"`) with their corresponding xml escaped value.
97109pub ( crate ) fn _escape < F : Fn ( u8 ) -> bool > ( raw : & str , escape_chars : F ) -> Cow < str > {
@@ -121,7 +133,47 @@ pub(crate) fn _escape<F: Fn(u8) -> bool>(raw: &str, escape_chars: F) -> Cow<str>
121133 b'\r' => escaped. extend_from_slice ( b" " ) ,
122134 b' ' => escaped. extend_from_slice ( b" " ) ,
123135 _ => unreachable ! (
124- "Only '<', '>','\' , '&', '\" ', '\\ t', '\\ r', '\\ n', and ' ' are escaped"
136+ "Only '<', '>','\' , '&', '\" ', '\\ t', '\\ r', '\\ n', and ' ' are escaped" ) ,
137+ }
138+ pos = new_pos + 1 ;
139+ }
140+
141+ if let Some ( mut escaped) = escaped {
142+ if let Some ( raw) = bytes. get ( pos..) {
143+ escaped. extend_from_slice ( raw) ;
144+ }
145+ // SAFETY: we operate on UTF-8 input and search for an one byte chars only,
146+ // so all slices that was put to the `escaped` is a valid UTF-8 encoded strings
147+ // TODO: Can be replaced with `unsafe { String::from_utf8_unchecked() }`
148+ // if unsafe code will be allowed
149+ Cow :: Owned ( String :: from_utf8 ( escaped) . unwrap ( ) )
150+ } else {
151+ Cow :: Borrowed ( raw)
152+ }
153+ }
154+
155+ /// Escapes a `&[u8]` and replaces all xml special characters (<, >, &, ', ") with their
156+ /// corresponding xml escaped value.
157+ pub fn simd_escape < ' a > ( raw : & ' a str , escape_matcher : & jetscii:: BytesConst ) -> Cow < ' a , str > {
158+ let bytes = raw. as_bytes ( ) ;
159+ let mut escaped = None ;
160+ let mut pos = 0 ;
161+ while let Some ( i) = escape_matcher. find ( & bytes[ pos..] ) {
162+ if escaped. is_none ( ) {
163+ escaped = Some ( Vec :: with_capacity ( raw. len ( ) ) ) ;
164+ }
165+ let escaped = escaped. as_mut ( ) . expect ( "initialized" ) ;
166+ let new_pos = pos + i;
167+ escaped. extend_from_slice ( & bytes[ pos..new_pos] ) ;
168+ match bytes[ new_pos] {
169+ b'<' => escaped. extend_from_slice ( b"<" ) ,
170+ b'>' => escaped. extend_from_slice ( b">" ) ,
171+ b'\'' => escaped. extend_from_slice ( b"'" ) ,
172+ b'&' => escaped. extend_from_slice ( b"&" ) ,
173+ b'"' => escaped. extend_from_slice ( b""" ) ,
174+ c @ _ => unreachable ! (
175+ "Found {} but only '<', '>', ', '&' and '\" ' are escaped" ,
176+ c as char
125177 ) ,
126178 }
127179 pos = new_pos + 1 ;
0 commit comments