1919BOOST_JSON_NS_BEGIN
2020namespace detail {
2121
22+ #ifdef BOOST_JSON_BIG_ENDIAN
23+ # define BOOST_JSON_MK_NUM (b1, b2 ) 0x ## b2 ## b1
24+ # define BOOST_JSON_MK_NUM2 (b1, b2 ) 0x ## b2 ## b1 ## 0000
25+ # define BOOST_JSON_MK_NUM3 (b1, b2, b3 ) 0x ## b3 ## b2 # b1 ## 00
26+ # define BOOST_JSON_MK_NUM4 (b1, b2, b3, b4 ) 0x ## b4 ## b3 ## b2 # b1
27+ # define BOOST_JSON_UTF8_KIND (b ) b & 0xFF
28+ # define BOOST_JSON_UTF8_LENGTH (b ) b >> 8
29+ #else
30+ # define BOOST_JSON_MK_NUM (b1, b2 ) 0x ## b1 ## b2
31+ # define BOOST_JSON_MK_NUM2 (b1, b2 ) 0x ## b1 ## b2
32+ # define BOOST_JSON_MK_NUM3 (b1, b2, b3 ) 0x ## b1 ## b2 ## b3
33+ # define BOOST_JSON_MK_NUM4 (b1, b2, b3, b4 ) 0x ## b1 ## b2 ## b3 ## b4
34+ # define BOOST_JSON_UTF8_KIND (b ) b >> 8
35+ # define BOOST_JSON_UTF8_LENGTH (b ) b & FF
36+ #endif
37+
2238template <int N>
2339std::uint32_t
2440load_little_endian (void const * p)
3854uint16_t
3955classify_utf8 (char c)
4056{
57+ // for little endian
4158 // 0x000 = invalid
4259 // 0x102 = 2 bytes, second byte [80, BF]
4360 // 0x203 = 3 bytes, second byte [A0, BF]
@@ -46,6 +63,7 @@ classify_utf8(char c)
4663 // 0x504 = 4 bytes, second byte [90, BF]
4764 // 0x604 = 4 bytes, second byte [80, BF]
4865 // 0x704 = 4 bytes, second byte [80, 8F]
66+ // for big endian the bytes are reversed
4967 static constexpr uint16_t first[128 ]
5068 {
5169 0x000 , 0x000 , 0x000 , 0x000 , 0x000 , 0x000 , 0x000 , 0x000 ,
@@ -57,13 +75,41 @@ classify_utf8(char c)
5775 0x000 , 0x000 , 0x000 , 0x000 , 0x000 , 0x000 , 0x000 , 0x000 ,
5876 0x000 , 0x000 , 0x000 , 0x000 , 0x000 , 0x000 , 0x000 , 0x000 ,
5977
60- 0x000 , 0x000 , 0x102 , 0x102 , 0x102 , 0x102 , 0x102 , 0x102 ,
61- 0x102 , 0x102 , 0x102 , 0x102 , 0x102 , 0x102 , 0x102 , 0x102 ,
62- 0x102 , 0x102 , 0x102 , 0x102 , 0x102 , 0x102 , 0x102 , 0x102 ,
63- 0x102 , 0x102 , 0x102 , 0x102 , 0x102 , 0x102 , 0x102 , 0x102 ,
64- 0x203 , 0x303 , 0x303 , 0x303 , 0x303 , 0x303 , 0x303 , 0x303 ,
65- 0x303 , 0x303 , 0x303 , 0x303 , 0x303 , 0x403 , 0x303 , 0x303 ,
66- 0x504 , 0x604 , 0x604 , 0x604 , 0x704 , 0x000 , 0x000 , 0x000 ,
78+ BOOST_JSON_MK_NUM (0 , 00 ), BOOST_JSON_MK_NUM (0 , 00 ),
79+ BOOST_JSON_MK_NUM (1 , 02 ), BOOST_JSON_MK_NUM (1 , 02 ),
80+ BOOST_JSON_MK_NUM (1 , 02 ), BOOST_JSON_MK_NUM (1 , 02 ),
81+ BOOST_JSON_MK_NUM (1 , 02 ), BOOST_JSON_MK_NUM (1 , 02 ),
82+
83+ BOOST_JSON_MK_NUM (1 , 02 ), BOOST_JSON_MK_NUM (1 , 02 ),
84+ BOOST_JSON_MK_NUM (1 , 02 ), BOOST_JSON_MK_NUM (1 , 02 ),
85+ BOOST_JSON_MK_NUM (1 , 02 ), BOOST_JSON_MK_NUM (1 , 02 ),
86+ BOOST_JSON_MK_NUM (1 , 02 ), BOOST_JSON_MK_NUM (1 , 02 ),
87+
88+ BOOST_JSON_MK_NUM (1 , 02 ), BOOST_JSON_MK_NUM (1 , 02 ),
89+ BOOST_JSON_MK_NUM (1 , 02 ), BOOST_JSON_MK_NUM (1 , 02 ),
90+ BOOST_JSON_MK_NUM (1 , 02 ), BOOST_JSON_MK_NUM (1 , 02 ),
91+ BOOST_JSON_MK_NUM (1 , 02 ), BOOST_JSON_MK_NUM (1 , 02 ),
92+
93+ BOOST_JSON_MK_NUM (1 , 02 ), BOOST_JSON_MK_NUM (1 , 02 ),
94+ BOOST_JSON_MK_NUM (1 , 02 ), BOOST_JSON_MK_NUM (1 , 02 ),
95+ BOOST_JSON_MK_NUM (1 , 02 ), BOOST_JSON_MK_NUM (1 , 02 ),
96+ BOOST_JSON_MK_NUM (1 , 02 ), BOOST_JSON_MK_NUM (1 , 02 ),
97+
98+ BOOST_JSON_MK_NUM (2 , 03 ), BOOST_JSON_MK_NUM (3 , 03 ),
99+ BOOST_JSON_MK_NUM (3 , 03 ), BOOST_JSON_MK_NUM (3 , 03 ),
100+ BOOST_JSON_MK_NUM (3 , 03 ), BOOST_JSON_MK_NUM (3 , 03 ),
101+ BOOST_JSON_MK_NUM (3 , 03 ), BOOST_JSON_MK_NUM (3 , 03 ),
102+
103+ BOOST_JSON_MK_NUM (3 , 03 ), BOOST_JSON_MK_NUM (3 , 03 ),
104+ BOOST_JSON_MK_NUM (3 , 03 ), BOOST_JSON_MK_NUM (3 , 03 ),
105+ BOOST_JSON_MK_NUM (3 , 03 ), BOOST_JSON_MK_NUM (4 , 03 ),
106+ BOOST_JSON_MK_NUM (3 , 03 ), BOOST_JSON_MK_NUM (3 , 03 ),
107+
108+ BOOST_JSON_MK_NUM (5 , 04 ), BOOST_JSON_MK_NUM (6 , 04 ),
109+ BOOST_JSON_MK_NUM (6 , 04 ), BOOST_JSON_MK_NUM (6 , 04 ),
110+ BOOST_JSON_MK_NUM (7 , 04 ), BOOST_JSON_MK_NUM (0 , 00 ),
111+ BOOST_JSON_MK_NUM (0 , 00 ), BOOST_JSON_MK_NUM (0 , 00 ),
112+
67113 0x000 , 0x000 , 0x000 , 0x000 , 0x000 , 0x000 , 0x000 , 0x000 ,
68114 };
69115 return first[static_cast <unsigned char >(c & 0x7F )];
@@ -74,30 +120,33 @@ bool
74120is_valid_utf8 (const char * p, uint16_t first)
75121{
76122 uint32_t v;
77- switch (first >> 8 )
123+ switch (BOOST_JSON_UTF8_KIND ( first) )
78124 {
79125 default :
80126 return false ;
81127
82128 // 2 bytes, second byte [80, BF]
83129 case 1 :
84- v = load_little_endian< 2 >(p );
85- return (v & 0xC000 ) == 0x8000 ;
130+ std::memcpy (&v, p, 2 );
131+ return (v & BOOST_JSON_MK_NUM2 (C0, 00 )) == BOOST_JSON_MK_NUM2 ( 80 , 00 ) ;
86132
87133 // 3 bytes, second byte [A0, BF]
88134 case 2 :
89- v = load_little_endian<3 >(p);
90- return (v & 0xC0E000 ) == 0x80A000 ;
135+ std::memcpy (&v, p, 3 );
136+ return (v & BOOST_JSON_MK_NUM3 (C0,E0 ,00 ))
137+ == BOOST_JSON_MK_NUM3 (80 ,A0,00 );
91138
92139 // 3 bytes, second byte [80, BF]
93140 case 3 :
94- v = load_little_endian<3 >(p);
95- return (v & 0xC0C000 ) == 0x808000 ;
141+ std::memcpy (&v, p, 3 );
142+ return (v & BOOST_JSON_MK_NUM3 (C0,C0,00 ))
143+ == BOOST_JSON_MK_NUM3 (80 ,80 ,00 );
96144
97145 // 3 bytes, second byte [80, 9F]
98146 case 4 :
99- v = load_little_endian<3 >(p);
100- return (v & 0xC0E000 ) == 0x808000 ;
147+ std::memcpy (&v, p, 3 );
148+ return (v & BOOST_JSON_MK_NUM3 (C0,E0 ,00 ))
149+ == BOOST_JSON_MK_NUM3 (80 ,80 ,00 );
101150
102151 // 4 bytes, second byte [90, BF]
103152 case 5 :
@@ -106,13 +155,15 @@ is_valid_utf8(const char* p, uint16_t first)
106155
107156 // 4 bytes, second byte [80, BF]
108157 case 6 :
109- v = load_little_endian<4 >(p);
110- return (v & 0xC0C0C000 ) == 0x80808000 ;
158+ std::memcpy (&v, p, 4 );
159+ return (v & BOOST_JSON_MK_NUM4 (C0,C0,C0,00 ))
160+ == BOOST_JSON_MK_NUM4 (80 ,80 ,80 ,00 );
111161
112162 // 4 bytes, second byte [80, 8F]
113163 case 7 :
114- v = load_little_endian<4 >(p);
115- return (v & 0xC0C0F000 ) == 0x80808000 ;
164+ std::memcpy (&v, p, 4 );
165+ return (v & BOOST_JSON_MK_NUM4 (C0,C0,F0,00 ))
166+ == BOOST_JSON_MK_NUM4 (80 ,80 ,80 ,00 );
116167 }
117168}
118169
0 commit comments