@@ -21,58 +21,12 @@ namespace boost {
2121namespace json {
2222namespace detail {
2323
24- template <endian::order = endian::order::little>
25- constexpr
26- std::uint32_t
27- make_u32_impl (std::uint8_t b4, std::uint8_t b3, std::uint8_t b2, std::uint8_t b1)
28- {
29- return (b4 << 24 ) | (b3 << 16 ) | (b2 << 8 ) | b1;
30- }
31-
32- template <>
33- constexpr
34- std::uint32_t
35- make_u32_impl<endian::order::big>(
36- std::uint8_t b4, std::uint8_t b3, std::uint8_t b2, std::uint8_t b1)
37- {
38- return (b1 << 24 ) | (b2 << 16 ) | (b3 << 8 ) | b4;
39- }
40-
41- constexpr
42- std::uint32_t
43- make_u32 (std::uint8_t b4, std::uint8_t b3, std::uint8_t b2, std::uint8_t b1)
44- {
45- return make_u32_impl<endian::order::native>(b4, b3, b2, b1);
46- }
47-
48- template <endian::order = endian::order::little>
49- constexpr
50- bool
51- utf8_case5 (std::uint32_t v)
52- {
53- return ( ( ( v & make_u32 (0xC0 ,0xC0 ,0xF0 ,0x00 ) )
54- + make_u32 (0x7F ,0x7F ,0x70 ,0x00 ) ) | make_u32 (0x00 ,0x00 ,0x30 ,0x00 ) )
55- == make_u32 (0x00 ,0x00 ,0x30 ,0x00 );
56- }
57-
58- template <>
59- constexpr
60- bool
61- utf8_case5<endian::order::big>(std::uint32_t v)
62- {
63- return ( ( ( v & make_u32 (0xC0 ,0xC0 ,0xF0 ,0x00 ) )
64- + make_u32 (0x00 ,0x00 ,0x70 ,0xFF ) ) | make_u32 (0x00 ,0x00 ,0x30 ,0x00 ) )
65- == make_u32 (0x80 ,0x80 ,0x30 ,0x00 );
66- }
67-
6824template <int N>
6925std::uint32_t
7026load_little_endian (void const * p)
7127{
72- std::uint32_t v = 0 ;
73- std::memcpy (&v, p, N);
74- endian::little_to_native_inplace (v);
75- return v;
28+ auto const up = reinterpret_cast <unsigned char const *>(p);
29+ return endian::endian_load<std::uint32_t , N, endian::order::little>(up);
7630}
7731
7832inline
@@ -122,47 +76,49 @@ is_valid_utf8(const char* p, uint16_t first)
12276
12377 // 2 bytes, second byte [80, BF]
12478 case 1 :
125- std::memcpy (&v, p, 2 );
126- return ( v & make_u32 (0x00 ,0x00 ,0xC0 ,0x00 ) )
127- == make_u32 (0x00 ,0x00 ,0x80 ,0x00 );
128-
129- // 3 bytes, second byte [A0, BF]
130- case 2 :
131- std::memcpy (&v, p, 3 );
132- return ( v & make_u32 (0x00 ,0xC0 ,0xE0 ,0x00 ) )
133- == make_u32 (0x00 ,0x80 ,0xA0 ,0x00 );
134-
135- // 3 bytes, second byte [80, BF]
136- case 3 :
137- std::memcpy (&v, p, 3 );
138- return ( v & make_u32 (0x00 ,0xC0 ,0xC0 ,0x00 ) )
139- == make_u32 (0x00 ,0x80 ,0x80 ,0x00 );
140-
141- // 3 bytes, second byte [80, 9F]
142- case 4 :
143- std::memcpy (&v, p, 3 );
144- return ( v & make_u32 (0x00 ,0xC0 ,0xE0 ,0x00 ) )
145- == make_u32 (0x00 ,0x80 ,0x80 ,0x00 );
146-
147- // 4 bytes, second byte [90, BF]
148- case 5 :
149- std::memcpy (&v, p, 4 );
150- return utf8_case5<endian::order::native>(v);
151-
152- // 4 bytes, second byte [80, BF]
153- case 6 :
154- std::memcpy (&v, p, 4 );
155- return ( v & make_u32 (0xC0 ,0xC0 ,0xC0 ,0x00 ) )
156- == make_u32 (0x80 ,0x80 ,0x80 ,0x00 );
157-
158- // 4 bytes, second byte [80, 8F]
159- case 7 :
160- std::memcpy (&v, p, 4 );
161- return ( v & make_u32 (0xC0 ,0xC0 ,0xF0 ,0x00 ) )
162- == make_u32 (0x80 ,0x80 ,0x80 ,0x00 );
79+ v = load_little_endian<2 >(p);
80+ return (v & 0xC000 ) == 0x8000 ;
81+
82+ // 3 bytes, second byte [A0, BF]
83+ case 2 :
84+ v = load_little_endian<3 >(p);
85+ return (v & 0xC0E000 ) == 0x80A000 ;
86+
87+ // 3 bytes, second byte [80, BF]
88+ case 3 :
89+ v = load_little_endian<3 >(p);
90+ return (v & 0xC0C000 ) == 0x808000 ;
91+
92+ // 3 bytes, second byte [80, 9F]
93+ case 4 :
94+ v = load_little_endian<3 >(p);
95+ return (v & 0xC0E000 ) == 0x808000 ;
96+
97+ // 4 bytes, second byte [90, BF]
98+ case 5 :
99+ v = load_little_endian<4 >(p);
100+ return (v & 0xC0C0FF00 ) + 0x7F7F7000 <= 0x2F00 ;
101+
102+ // 4 bytes, second byte [80, BF]
103+ case 6 :
104+ v = load_little_endian<4 >(p);
105+ return (v & 0xC0C0C000 ) == 0x80808000 ;
106+
107+ // 4 bytes, second byte [80, 8F]
108+ case 7 :
109+ v = load_little_endian<4 >(p);
110+ return (v & 0xC0C0F000 ) == 0x80808000 ;
163111 }
164112}
165113
114+ BOOST_NOINLINE
115+ inline
116+ bool
117+ is_valid_utf8_no_inline (const char * p, uint16_t first)
118+ {
119+ return is_valid_utf8 (p, first);
120+ }
121+
166122class utf8_sequence
167123{
168124 char seq_[4 ];
0 commit comments