rust-bakery · micolous · Aug 31, 2025 · Aug 31, 2025 · Aug 31, 2025 · Aug 31, 2025
@@ -417,7 +417,7 @@ where
 ///
 pub fn escaped<'a, I, Error, F, G>(
   normal: F,
-  control_char: char,
+  control_char: impl crate::traits::AsChar,
   escapable: G,
 ) -> impl FnMut(I) -> IResult<I, I, Error>
 where
@@ -467,7 +467,7 @@ where
 #[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))]
 pub fn escaped_transform<I, Error, F, G, O1, O2, ExtendItem, Output>(
   normal: F,
-  control_char: char,
+  control_char: impl crate::traits::AsChar,
   transform: G,
 ) -> impl FnMut(I) -> IResult<I, Output, Error>
 where

@@ -725,7 +725,7 @@ where
 ///
 pub fn escaped<I, Error, F, G>(
   normal: F,
-  control_char: char,
+  control_char: impl AsChar,
   escapable: G,
 ) -> impl Parser<I, Output = I, Error = Error>
 where
@@ -744,17 +744,18 @@ where
 }
 
 /// Parser implementation for [escaped]
-pub struct Escaped<F, G, E> {
+pub struct Escaped<F, G, C, E> {
   normal: F,
   escapable: G,
-  control_char: char,
+  control_char: C,
   e: PhantomData<E>,
 }
 
-impl<I, Error: ParseError<I>, F, G> Parser<I> for Escaped<F, G, Error>
+impl<I, Error: ParseError<I>, F, G, C> Parser<I> for Escaped<F, G, C, Error>
 where
   I: Input + Clone + crate::traits::Offset,
   <I as Input>::Item: crate::traits::AsChar,
+  C: crate::traits::AsChar,
   F: Parser<I, Error = Error>,
   G: Parser<I, Error = Error>,
   Error: ParseError<I>,
@@ -798,8 +799,8 @@ where
         }
         Err(Err::Error(_)) => {
           // unwrap() should be safe here since index < $i.input_len()
-          if i.iter_elements().next().unwrap().as_char() == self.control_char {
-            let next = self.control_char.len_utf8();
+          if i.iter_elements().next().unwrap().as_char() == self.control_char.as_char() {
+            let next = self.control_char.len();
             if next >= i.input_len() {
               if OM::Incomplete::is_streaming() {
                 return Err(Err::Incomplete(Needed::new(1)));
@@ -900,7 +901,7 @@ where
 #[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))]
 pub fn escaped_transform<I, Error, F, G, ExtendItem, Output>(
   normal: F,
-  control_char: char,
+  control_char: impl AsChar,
   transform: G,
 ) -> impl Parser<I, Output = Output, Error = Error>
 where
@@ -924,23 +925,24 @@ where
 }
 
 /// Parser implementation for [escaped_transform]
-pub struct EscapedTransform<F, G, E, ExtendItem, Output> {
+pub struct EscapedTransform<F, G, C, E, ExtendItem, Output> {
   normal: F,
   transform: G,
-  control_char: char,
+  control_char: C,
   e: PhantomData<E>,
   extend: PhantomData<ExtendItem>,
   o: PhantomData<Output>,
 }
 
-impl<I, Error: ParseError<I>, F, G, ExtendItem, Output> Parser<I>
-  for EscapedTransform<F, G, Error, ExtendItem, Output>
+impl<I, Error: ParseError<I>, F, G, C, ExtendItem, Output> Parser<I>
+  for EscapedTransform<F, G, C, Error, ExtendItem, Output>
 where
   I: Clone + crate::traits::Offset + Input,
   I: crate::traits::ExtendInto<Item = ExtendItem, Extender = Output>,
   <F as Parser<I>>::Output: crate::traits::ExtendInto<Item = ExtendItem, Extender = Output>,
   <G as Parser<I>>::Output: crate::traits::ExtendInto<Item = ExtendItem, Extender = Output>,
   <I as Input>::Item: crate::traits::AsChar,
+  C: crate::traits::AsChar,
   F: Parser<I, Error = Error>,
   G: Parser<I, Error = Error>,
   Error: ParseError<I>,
@@ -979,8 +981,8 @@ where
         }
         Err(Err::Error(_)) => {
           // unwrap() should be safe here since index < $i.input_len()
-          if remainder.iter_elements().next().unwrap().as_char() == self.control_char {
-            let next = index + self.control_char.len_utf8();
+          if remainder.iter_elements().next().unwrap().as_char() == self.control_char.as_char() {
+            let next = index + self.control_char.len();
             let input_len = input.input_len();
 
             if next >= input_len {

@@ -431,7 +431,7 @@ where
 ///
 pub fn escaped<I, Error, F, G>(
   normal: F,
-  control_char: char,
+  control_char: impl crate::traits::AsChar,
   escapable: G,
 ) -> impl FnMut(I) -> IResult<I, I, Error>
 where
@@ -480,7 +480,7 @@ where
 #[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))]
 pub fn escaped_transform<I, Error, F, G, O1, O2, ExtendItem, Output>(
   normal: F,
-  control_char: char,
+  control_char: impl crate::traits::AsChar,
   transform: G,
 ) -> impl FnMut(I) -> IResult<I, Output, Error>
 where

@@ -73,7 +73,7 @@ fn escaping() {
   use crate::character::streaming::one_of;
 
   fn esc(i: &[u8]) -> IResult<&[u8], &[u8]> {
-    escaped(alpha, '\\', one_of("\"n\\"))(i)
+    escaped(alpha, b'\\', one_of(b"\"n\\".as_slice()))(i)
   }
   assert_eq!(esc(&b"abcd;"[..]), Ok((&b";"[..], &b"abcd"[..])));
   assert_eq!(esc(&b"ab\\\"cd;"[..]), Ok((&b";"[..], &b"ab\\\"cd"[..])));
@@ -97,9 +97,27 @@ fn escaping() {
   );
 
   fn esc2(i: &[u8]) -> IResult<&[u8], &[u8]> {
-    escaped(digit, '\\', one_of("\"n\\"))(i)
+    escaped(digit, b'\\', one_of(&b"\"n\\"[..]))(i)
   }
   assert_eq!(esc2(&b"12\\nnn34"[..]), Ok((&b"nn34"[..], &b"12\\n"[..])));
+
+  // Escapes containing invalid UTF-8 sequences
+  // https://github.com/rust-bakery/nom/issues/1679
+  fn esc3(i: &[u8]) -> IResult<&[u8], &[u8]> {
+    escaped(digit, 0xDB, one_of(&b"\xDB\xDC\xDD"[..]))(i)
+  }
+  assert_eq!(
+    esc3(&b"12\xDB\xDC34;"[..]),
+    Ok((b";".as_slice(), b"12\xDB\xDC34".as_slice()))
+  );
+  assert_eq!(
+    esc3(&b"12\xDC34;"[..]),
+    Ok((b"\xDC34;".as_slice(), b"12".as_slice()))
+  );
+  assert_eq!(
+    esc3(&b"12\xDB\xDC\xDC\xDC34"[..]),
+    Ok((b"\xDC\xDC34".as_slice(), b"12\xDB\xDC".as_slice()))
+  );
 }
 
 #[cfg(feature = "alloc")]
@@ -147,17 +165,17 @@ fn to_s(i: Vec<u8>) -> String {
 #[cfg(feature = "alloc")]
 #[test]
 fn escape_transform() {
-  use crate::Parser;
+  use crate::{bytes::complete::is_not, Parser};
 
   fn esc(i: &[u8]) -> IResult<&[u8], String> {
     map(
       escaped_transform(
         alpha,
-        '\\',
+        b'\\',
         alt((
-          value(&b"\\"[..], tag("\\")),
-          value(&b"\""[..], tag("\"")),
-          value(&b"\n"[..], tag("n")),
+          value(b"\\".as_slice(), tag(b"\\".as_slice())),
+          value(b"\"".as_slice(), tag(b"\"".as_slice())),
+          value(b"\n".as_slice(), tag(b"n".as_slice())),
         )),
       ),
       to_s,
@@ -199,10 +217,10 @@ fn escape_transform() {
     map(
       escaped_transform(
         alpha,
-        '&',
+        b'&',
         alt((
-          value("è".as_bytes(), tag("egrave;")),
-          value("à".as_bytes(), tag("agrave;")),
+          value("è".as_bytes(), tag(b"egrave;".as_slice())),
+          value("à".as_bytes(), tag(b"agrave;".as_slice())),
         )),
       ),
       to_s,
@@ -217,6 +235,40 @@ fn escape_transform() {
     esc2(&b"ab&egrave;D&agrave;EF;"[..]),
     Ok((&b";"[..], String::from("abèDàEF")))
   );
+
+  const FEND: u8 = 0xC0;
+  const FESC: u8 = 0xDB;
+  const TFEND: u8 = 0xDC;
+  const TFESC: u8 = 0xDD;
+
+  // Escapes containing invalid UTF-8 sequences
+  // https://github.com/rust-bakery/nom/issues/1679
+  fn esc3(i: &[u8]) -> IResult<&[u8], Vec<u8>> {
+    escaped_transform(
+      is_not([FESC].as_slice()),
+      FESC,
+      alt((
+        value(&[FEND][..], tag(&[TFEND][..])),
+        value(&[FESC][..], tag(&[TFESC][..])),
+      )),
+    )(i)
+  }
+
+  assert_eq!(
+    esc3(&[0x61, 0x62, FESC, TFEND, 0x63, 0x64, 0x65]),
+    Ok((&[][..], vec![0x61, 0x62, FEND, 0x63, 0x64, 0x65])),
+  );
+  assert_eq!(
+    esc3(&[0x61, 0x62, 0x63]),
+    Ok((&[][..], vec![0x61, 0x62, 0x63])),
+  );
+  assert_eq!(
+    esc3(&[0x61, FESC, 0x00, TFEND, 0x63, 0x64]),
+    Err(Err::Error(error_position!(
+      &[0x00, TFEND, 0x63, 0x64][..],
+      ErrorKind::Tag
+    ))),
+  );
 }
 
 #[cfg(feature = "std")]

@@ -138,19 +138,26 @@ where
   }
 }
 
-/// Recognizes one character and checks that it satisfies a predicate
+/// Recognizes one character or byte and checks that it satisfies a predicate.
 ///
 /// # Example
 ///
 /// ```
 /// # use nom::{Err, error::{ErrorKind, Error}, Needed, IResult};
 /// # use nom::character::complete::satisfy;
-/// fn parser(i: &str) -> IResult<&str, char> {
+/// fn char_parser(i: &str) -> IResult<&str, char> {
 ///     satisfy(|c| c == 'a' || c == 'b')(i)
 /// }
-/// assert_eq!(parser("abc"), Ok(("bc", 'a')));
-/// assert_eq!(parser("cd"), Err(Err::Error(Error::new("cd", ErrorKind::Satisfy))));
-/// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::Satisfy))));
+/// assert_eq!(char_parser("abc"), Ok(("bc", 'a')));
+/// assert_eq!(char_parser("cd"), Err(Err::Error(Error::new("cd", ErrorKind::Satisfy))));
+/// assert_eq!(char_parser(""), Err(Err::Error(Error::new("", ErrorKind::Satisfy))));
+///
+/// fn byte_parser(i: &[u8]) -> IResult<&[u8], char> {
+///     satisfy(|c| c == 'a' || c == 'b')(i)
+/// }
+/// assert_eq!(byte_parser(b"abc"), Ok((&b"bc"[..], 'a')));
+/// assert_eq!(byte_parser(b"cd"), Err(Err::Error(Error::new(&b"cd"[..], ErrorKind::Satisfy))));
+/// assert_eq!(byte_parser(b""), Err(Err::Error(Error::new(&b""[..], ErrorKind::Satisfy))));
 /// ```
 pub fn satisfy<F, I, Error: ParseError<I>>(
   predicate: F,
@@ -190,7 +197,7 @@ where
     match (i).iter_elements().next().map(|t| {
       let c = t.as_char();
       let b = (self.predicate)(c);
-      (c, b)
+      (c, t.len(), b)
     }) {
       None => {
         if OM::Incomplete::is_streaming() {
@@ -199,13 +206,13 @@ where
           Err(Err::Error(OM::Error::bind(|| (self.make_error)(i))))
         }
       }
-      Some((_, false)) => Err(Err::Error(OM::Error::bind(|| (self.make_error)(i)))),
-      Some((c, true)) => Ok((i.take_from(c.len()), OM::Output::bind(|| c.as_char()))),
+      Some((_, _, false)) => Err(Err::Error(OM::Error::bind(|| (self.make_error)(i)))),
+      Some((c, len, true)) => Ok((i.take_from(len), OM::Output::bind(|| c))),
     }
   }
 }
 
-/// Recognizes one of the provided characters.
+/// Recognizes one of the provided characters or bytes.
 ///
 /// # Example
 ///
@@ -215,6 +222,10 @@ where
 /// assert_eq!(one_of::<_, _, (&str, ErrorKind)>("abc")("b"), Ok(("", 'b')));
 /// assert_eq!(one_of::<_, _, (&str, ErrorKind)>("a")("bc"), Err(Err::Error(("bc", ErrorKind::OneOf))));
 /// assert_eq!(one_of::<_, _, (&str, ErrorKind)>("a")(""), Err(Err::Error(("", ErrorKind::OneOf))));
+///
+/// assert_eq!(one_of::<_, _, (&[u8], ErrorKind)>(&b"abc"[..])(b"b"), Ok((&b""[..], 'b')));
+/// assert_eq!(one_of::<_, _, (&[u8], ErrorKind)>(&b"a"[..])(b"bc"), Err(Err::Error((&b"bc"[..], ErrorKind::OneOf))));
+/// assert_eq!(one_of::<_, _, (&[u8], ErrorKind)>(&b"a"[..])(b""), Err(Err::Error((&b""[..], ErrorKind::OneOf))));
 /// ```
 pub fn one_of<I, T, Error: ParseError<I>>(list: T) -> impl Parser<I, Output = char, Error = Error>
 where
@@ -223,12 +234,12 @@ where
   T: FindToken<char>,
 {
   Satisfy {
-    predicate: move |c: char| list.find_token(c),
+    predicate: move |c| list.find_token(c),
     make_error: move |i| Error::from_error_kind(i, ErrorKind::OneOf),
   }
 }
 
-//. Recognizes a character that is not in the provided characters.
+/// Recognizes a character or byte that is not in the provided characters or bytes.
 ///
 /// # Example
 ///
@@ -238,6 +249,10 @@ where
 /// assert_eq!(none_of::<_, _, (_, ErrorKind)>("abc")("z"), Ok(("", 'z')));
 /// assert_eq!(none_of::<_, _, (_, ErrorKind)>("ab")("a"), Err(Err::Error(("a", ErrorKind::NoneOf))));
 /// assert_eq!(none_of::<_, _, (_, ErrorKind)>("a")(""), Err(Err::Incomplete(Needed::Unknown)));
+///
+/// assert_eq!(none_of::<_, _, (&[u8], ErrorKind)>(&b"abc"[..])(b"z"), Ok((&b""[..], 'z')));
+/// assert_eq!(none_of::<_, _, (&[u8], ErrorKind)>(&b"ab"[..])(b"a"), Err(Err::Error((&b"a"[..], ErrorKind::NoneOf))));
+/// assert_eq!(none_of::<_, _, (&[u8], ErrorKind)>(&b"a"[..])(b""), Err(Err::Incomplete(Needed::Unknown)));
 /// ```
 pub fn none_of<I, T, Error: ParseError<I>>(list: T) -> impl Parser<I, Output = char, Error = Error>
 where
@@ -246,12 +261,12 @@ where
   T: FindToken<char>,
 {
   Satisfy {
-    predicate: move |c: char| !list.find_token(c),
+    predicate: move |c| !list.find_token(c),
     make_error: move |i| Error::from_error_kind(i, ErrorKind::NoneOf),
   }
 }
 
-// Matches one byte as a character. Note that the input type will
+/// Matches one byte as a character. Note that the input type will
 /// accept a `str`, but not a `&[u8]`, unlike many other nom parsers.
 ///
 /// # Example