Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/bytes/complete.rs
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,7 @@ where
///
pub fn escaped<'a, I, Error, F, G>(
normal: F,
control_char: char,
control_char: impl crate::traits::AsChar,
escapable: G,
) -> impl FnMut(I) -> IResult<I, I, Error>
where
Expand Down Expand Up @@ -467,7 +467,7 @@ where
#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))]
pub fn escaped_transform<I, Error, F, G, O1, O2, ExtendItem, Output>(
normal: F,
control_char: char,
control_char: impl crate::traits::AsChar,
transform: G,
) -> impl FnMut(I) -> IResult<I, Output, Error>
where
Expand Down
28 changes: 15 additions & 13 deletions src/bytes/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -725,7 +725,7 @@ where
///
pub fn escaped<I, Error, F, G>(
normal: F,
control_char: char,
control_char: impl AsChar,
escapable: G,
) -> impl Parser<I, Output = I, Error = Error>
where
Expand All @@ -744,17 +744,18 @@ where
}

/// Parser implementation for [escaped]
pub struct Escaped<F, G, E> {
pub struct Escaped<F, G, C, E> {
normal: F,
escapable: G,
control_char: char,
control_char: C,
e: PhantomData<E>,
}

impl<I, Error: ParseError<I>, F, G> Parser<I> for Escaped<F, G, Error>
impl<I, Error: ParseError<I>, F, G, C> Parser<I> for Escaped<F, G, C, Error>
where
I: Input + Clone + crate::traits::Offset,
<I as Input>::Item: crate::traits::AsChar,
C: crate::traits::AsChar,
F: Parser<I, Error = Error>,
G: Parser<I, Error = Error>,
Error: ParseError<I>,
Expand Down Expand Up @@ -798,8 +799,8 @@ where
}
Err(Err::Error(_)) => {
// unwrap() should be safe here since index < $i.input_len()
if i.iter_elements().next().unwrap().as_char() == self.control_char {
let next = self.control_char.len_utf8();
if i.iter_elements().next().unwrap().as_char() == self.control_char.as_char() {
let next = self.control_char.len();
if next >= i.input_len() {
if OM::Incomplete::is_streaming() {
return Err(Err::Incomplete(Needed::new(1)));
Expand Down Expand Up @@ -900,7 +901,7 @@ where
#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))]
pub fn escaped_transform<I, Error, F, G, ExtendItem, Output>(
normal: F,
control_char: char,
control_char: impl AsChar,
transform: G,
) -> impl Parser<I, Output = Output, Error = Error>
where
Expand All @@ -924,23 +925,24 @@ where
}

/// Parser implementation for [escaped_transform]
pub struct EscapedTransform<F, G, E, ExtendItem, Output> {
pub struct EscapedTransform<F, G, C, E, ExtendItem, Output> {
normal: F,
transform: G,
control_char: char,
control_char: C,
e: PhantomData<E>,
extend: PhantomData<ExtendItem>,
o: PhantomData<Output>,
}

impl<I, Error: ParseError<I>, F, G, ExtendItem, Output> Parser<I>
for EscapedTransform<F, G, Error, ExtendItem, Output>
impl<I, Error: ParseError<I>, F, G, C, ExtendItem, Output> Parser<I>
for EscapedTransform<F, G, C, Error, ExtendItem, Output>
where
I: Clone + crate::traits::Offset + Input,
I: crate::traits::ExtendInto<Item = ExtendItem, Extender = Output>,
<F as Parser<I>>::Output: crate::traits::ExtendInto<Item = ExtendItem, Extender = Output>,
<G as Parser<I>>::Output: crate::traits::ExtendInto<Item = ExtendItem, Extender = Output>,
<I as Input>::Item: crate::traits::AsChar,
C: crate::traits::AsChar,
F: Parser<I, Error = Error>,
G: Parser<I, Error = Error>,
Error: ParseError<I>,
Expand Down Expand Up @@ -979,8 +981,8 @@ where
}
Err(Err::Error(_)) => {
// unwrap() should be safe here since index < $i.input_len()
if remainder.iter_elements().next().unwrap().as_char() == self.control_char {
let next = index + self.control_char.len_utf8();
if remainder.iter_elements().next().unwrap().as_char() == self.control_char.as_char() {
let next = index + self.control_char.len();
let input_len = input.input_len();

if next >= input_len {
Expand Down
4 changes: 2 additions & 2 deletions src/bytes/streaming.rs
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,7 @@ where
///
pub fn escaped<I, Error, F, G>(
normal: F,
control_char: char,
control_char: impl crate::traits::AsChar,
escapable: G,
) -> impl FnMut(I) -> IResult<I, I, Error>
where
Expand Down Expand Up @@ -480,7 +480,7 @@ where
#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))]
pub fn escaped_transform<I, Error, F, G, O1, O2, ExtendItem, Output>(
normal: F,
control_char: char,
control_char: impl crate::traits::AsChar,
transform: G,
) -> impl FnMut(I) -> IResult<I, Output, Error>
where
Expand Down
72 changes: 62 additions & 10 deletions src/bytes/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ fn escaping() {
use crate::character::streaming::one_of;

fn esc(i: &[u8]) -> IResult<&[u8], &[u8]> {
escaped(alpha, '\\', one_of("\"n\\"))(i)
escaped(alpha, b'\\', one_of(b"\"n\\".as_slice()))(i)
}
assert_eq!(esc(&b"abcd;"[..]), Ok((&b";"[..], &b"abcd"[..])));
assert_eq!(esc(&b"ab\\\"cd;"[..]), Ok((&b";"[..], &b"ab\\\"cd"[..])));
Expand All @@ -97,9 +97,27 @@ fn escaping() {
);

fn esc2(i: &[u8]) -> IResult<&[u8], &[u8]> {
escaped(digit, '\\', one_of("\"n\\"))(i)
escaped(digit, b'\\', one_of(&b"\"n\\"[..]))(i)
}
assert_eq!(esc2(&b"12\\nnn34"[..]), Ok((&b"nn34"[..], &b"12\\n"[..])));

// Escapes containing invalid UTF-8 sequences
// https://github.com/rust-bakery/nom/issues/1679
fn esc3(i: &[u8]) -> IResult<&[u8], &[u8]> {
escaped(digit, 0xDB, one_of(&b"\xDB\xDC\xDD"[..]))(i)
}
assert_eq!(
esc3(&b"12\xDB\xDC34;"[..]),
Ok((b";".as_slice(), b"12\xDB\xDC34".as_slice()))
);
assert_eq!(
esc3(&b"12\xDC34;"[..]),
Ok((b"\xDC34;".as_slice(), b"12".as_slice()))
);
assert_eq!(
esc3(&b"12\xDB\xDC\xDC\xDC34"[..]),
Ok((b"\xDC\xDC34".as_slice(), b"12\xDB\xDC".as_slice()))
);
}

#[cfg(feature = "alloc")]
Expand Down Expand Up @@ -147,17 +165,17 @@ fn to_s(i: Vec<u8>) -> String {
#[cfg(feature = "alloc")]
#[test]
fn escape_transform() {
use crate::Parser;
use crate::{bytes::complete::is_not, Parser};

fn esc(i: &[u8]) -> IResult<&[u8], String> {
map(
escaped_transform(
alpha,
'\\',
b'\\',
alt((
value(&b"\\"[..], tag("\\")),
value(&b"\""[..], tag("\"")),
value(&b"\n"[..], tag("n")),
value(b"\\".as_slice(), tag(b"\\".as_slice())),
value(b"\"".as_slice(), tag(b"\"".as_slice())),
value(b"\n".as_slice(), tag(b"n".as_slice())),
)),
),
to_s,
Expand Down Expand Up @@ -199,10 +217,10 @@ fn escape_transform() {
map(
escaped_transform(
alpha,
'&',
b'&',
alt((
value("è".as_bytes(), tag("egrave;")),
value("à".as_bytes(), tag("agrave;")),
value("è".as_bytes(), tag(b"egrave;".as_slice())),
value("à".as_bytes(), tag(b"agrave;".as_slice())),
)),
),
to_s,
Expand All @@ -217,6 +235,40 @@ fn escape_transform() {
esc2(&b"ab&egrave;D&agrave;EF;"[..]),
Ok((&b";"[..], String::from("abèDàEF")))
);

const FEND: u8 = 0xC0;
const FESC: u8 = 0xDB;
const TFEND: u8 = 0xDC;
const TFESC: u8 = 0xDD;

// Escapes containing invalid UTF-8 sequences
// https://github.com/rust-bakery/nom/issues/1679
fn esc3(i: &[u8]) -> IResult<&[u8], Vec<u8>> {
escaped_transform(
is_not([FESC].as_slice()),
FESC,
alt((
value(&[FEND][..], tag(&[TFEND][..])),
value(&[FESC][..], tag(&[TFESC][..])),
)),
)(i)
}

assert_eq!(
esc3(&[0x61, 0x62, FESC, TFEND, 0x63, 0x64, 0x65]),
Ok((&[][..], vec![0x61, 0x62, FEND, 0x63, 0x64, 0x65])),
);
assert_eq!(
esc3(&[0x61, 0x62, 0x63]),
Ok((&[][..], vec![0x61, 0x62, 0x63])),
);
assert_eq!(
esc3(&[0x61, FESC, 0x00, TFEND, 0x63, 0x64]),
Err(Err::Error(error_position!(
&[0x00, TFEND, 0x63, 0x64][..],
ErrorKind::Tag
))),
);
}

#[cfg(feature = "std")]
Expand Down
41 changes: 28 additions & 13 deletions src/character/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,19 +138,26 @@ where
}
}

/// Recognizes one character and checks that it satisfies a predicate
/// Recognizes one character or byte and checks that it satisfies a predicate.
///
/// # Example
///
/// ```
/// # use nom::{Err, error::{ErrorKind, Error}, Needed, IResult};
/// # use nom::character::complete::satisfy;
/// fn parser(i: &str) -> IResult<&str, char> {
/// fn char_parser(i: &str) -> IResult<&str, char> {
/// satisfy(|c| c == 'a' || c == 'b')(i)
/// }
/// assert_eq!(parser("abc"), Ok(("bc", 'a')));
/// assert_eq!(parser("cd"), Err(Err::Error(Error::new("cd", ErrorKind::Satisfy))));
/// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::Satisfy))));
/// assert_eq!(char_parser("abc"), Ok(("bc", 'a')));
/// assert_eq!(char_parser("cd"), Err(Err::Error(Error::new("cd", ErrorKind::Satisfy))));
/// assert_eq!(char_parser(""), Err(Err::Error(Error::new("", ErrorKind::Satisfy))));
///
/// fn byte_parser(i: &[u8]) -> IResult<&[u8], char> {
/// satisfy(|c| c == 'a' || c == 'b')(i)
/// }
/// assert_eq!(byte_parser(b"abc"), Ok((&b"bc"[..], 'a')));
/// assert_eq!(byte_parser(b"cd"), Err(Err::Error(Error::new(&b"cd"[..], ErrorKind::Satisfy))));
/// assert_eq!(byte_parser(b""), Err(Err::Error(Error::new(&b""[..], ErrorKind::Satisfy))));
/// ```
pub fn satisfy<F, I, Error: ParseError<I>>(
predicate: F,
Expand Down Expand Up @@ -190,7 +197,7 @@ where
match (i).iter_elements().next().map(|t| {
let c = t.as_char();
let b = (self.predicate)(c);
(c, b)
(c, t.len(), b)
}) {
None => {
if OM::Incomplete::is_streaming() {
Expand All @@ -199,13 +206,13 @@ where
Err(Err::Error(OM::Error::bind(|| (self.make_error)(i))))
}
}
Some((_, false)) => Err(Err::Error(OM::Error::bind(|| (self.make_error)(i)))),
Some((c, true)) => Ok((i.take_from(c.len()), OM::Output::bind(|| c.as_char()))),
Some((_, _, false)) => Err(Err::Error(OM::Error::bind(|| (self.make_error)(i)))),
Some((c, len, true)) => Ok((i.take_from(len), OM::Output::bind(|| c))),
}
}
}

/// Recognizes one of the provided characters.
/// Recognizes one of the provided characters or bytes.
///
/// # Example
///
Expand All @@ -215,6 +222,10 @@ where
/// assert_eq!(one_of::<_, _, (&str, ErrorKind)>("abc")("b"), Ok(("", 'b')));
/// assert_eq!(one_of::<_, _, (&str, ErrorKind)>("a")("bc"), Err(Err::Error(("bc", ErrorKind::OneOf))));
/// assert_eq!(one_of::<_, _, (&str, ErrorKind)>("a")(""), Err(Err::Error(("", ErrorKind::OneOf))));
///
/// assert_eq!(one_of::<_, _, (&[u8], ErrorKind)>(&b"abc"[..])(b"b"), Ok((&b""[..], 'b')));
/// assert_eq!(one_of::<_, _, (&[u8], ErrorKind)>(&b"a"[..])(b"bc"), Err(Err::Error((&b"bc"[..], ErrorKind::OneOf))));
/// assert_eq!(one_of::<_, _, (&[u8], ErrorKind)>(&b"a"[..])(b""), Err(Err::Error((&b""[..], ErrorKind::OneOf))));
/// ```
pub fn one_of<I, T, Error: ParseError<I>>(list: T) -> impl Parser<I, Output = char, Error = Error>
where
Expand All @@ -223,12 +234,12 @@ where
T: FindToken<char>,
{
Satisfy {
predicate: move |c: char| list.find_token(c),
predicate: move |c| list.find_token(c),
make_error: move |i| Error::from_error_kind(i, ErrorKind::OneOf),
}
}

//. Recognizes a character that is not in the provided characters.
/// Recognizes a character or byte that is not in the provided characters or bytes.
///
/// # Example
///
Expand All @@ -238,6 +249,10 @@ where
/// assert_eq!(none_of::<_, _, (_, ErrorKind)>("abc")("z"), Ok(("", 'z')));
/// assert_eq!(none_of::<_, _, (_, ErrorKind)>("ab")("a"), Err(Err::Error(("a", ErrorKind::NoneOf))));
/// assert_eq!(none_of::<_, _, (_, ErrorKind)>("a")(""), Err(Err::Incomplete(Needed::Unknown)));
///
/// assert_eq!(none_of::<_, _, (&[u8], ErrorKind)>(&b"abc"[..])(b"z"), Ok((&b""[..], 'z')));
/// assert_eq!(none_of::<_, _, (&[u8], ErrorKind)>(&b"ab"[..])(b"a"), Err(Err::Error((&b"a"[..], ErrorKind::NoneOf))));
/// assert_eq!(none_of::<_, _, (&[u8], ErrorKind)>(&b"a"[..])(b""), Err(Err::Incomplete(Needed::Unknown)));
/// ```
pub fn none_of<I, T, Error: ParseError<I>>(list: T) -> impl Parser<I, Output = char, Error = Error>
where
Expand All @@ -246,12 +261,12 @@ where
T: FindToken<char>,
{
Satisfy {
predicate: move |c: char| !list.find_token(c),
predicate: move |c| !list.find_token(c),
make_error: move |i| Error::from_error_kind(i, ErrorKind::NoneOf),
}
}

// Matches one byte as a character. Note that the input type will
/// Matches one byte as a character. Note that the input type will
/// accept a `str`, but not a `&[u8]`, unlike many other nom parsers.
///
/// # Example
Expand Down
Loading