2626import java .io .Closeable ;
2727import java .io .EOFException ;
2828import java .io .IOException ;
29+ import java .nio .charset .Charset ;
30+ import java .nio .charset .StandardCharsets ;
2931
3032/**
3133 * A class to read and parse an xml stream.
@@ -37,14 +39,13 @@ public class XmlReader implements Closeable {
3739
3840 //private static final ByteString LINEFEED_OR_CARRIAGE_RETURN = ByteString.encodeUtf8("\n\r");
3941
40- private static final ByteString UNQUOTED_STRING_TERMINALS
41- = ByteString .encodeUtf8 (" >/=\n " );
42+ private final ByteString unquotedStringTerminals ;
4243
43- private static final ByteString CDATA_CLOSE = ByteString . encodeUtf8 ( "]]>" ) ;
44- private static final ByteString CDATA_OPEN = ByteString . encodeUtf8 ( "<![CDATA[" ) ;
45- private static final ByteString DOCTYPE_OPEN = ByteString . encodeUtf8 ( "<!DOCTYPE" ) ;
46- private static final ByteString COMMENT_CLOSE = ByteString . encodeUtf8 ( "-->" ) ;
47- private static final ByteString XML_DECLARATION_CLOSE = ByteString . encodeUtf8 ( "?>" ) ;
44+ private final ByteString cdataClose ;
45+ private final ByteString cdataOpen ;
46+ private final ByteString doctypeOpen ;
47+ private final ByteString commentClose ;
48+ private final ByteString xmlDeclarationClose ;
4849 private static final ByteString UTF8_BOM = ByteString .of ((byte ) 0xEF , (byte ) 0xBB , (byte ) 0xBF );
4950
5051 private static final byte DOUBLE_QUOTE = '"' ;
@@ -97,21 +98,33 @@ public class XmlReader implements Closeable {
9798
9899 private final BufferedSource source ;
99100 private final Buffer buffer ;
101+ private final Charset charset ;
100102 private String currentElementName ;
101103
102- private XmlReader (BufferedSource source ) {
104+ private XmlReader (BufferedSource source , Charset charset ) {
103105 if (source == null ) {
104106 throw new NullPointerException ("source == null" );
105107 }
106108 this .source = source ;
107109 this .buffer = source .buffer ();
110+ this .charset = charset ;
111+ unquotedStringTerminals = ByteString .encodeString (" >/=\n " , charset );
112+ cdataClose = ByteString .encodeString ("]]>" , charset );
113+ cdataOpen = ByteString .encodeString ("<![CDATA[" , charset );
114+ doctypeOpen = ByteString .encodeString ("<!DOCTYPE" , charset );
115+ commentClose = ByteString .encodeString ("-->" , charset );
116+ xmlDeclarationClose = ByteString .encodeString ("?>" , charset );
108117 }
109118
110119 /**
111120 * Returns a new instance that reads a XML-encoded stream from {@code source}.
112121 */
113122 public static XmlReader of (BufferedSource source ) {
114- return new XmlReader (source );
123+ return new XmlReader (source , StandardCharsets .UTF_8 );
124+ }
125+
126+ public static XmlReader of (BufferedSource source , Charset charset ) {
127+ return new XmlReader (source , charset );
115128 }
116129
117130 /**
@@ -313,7 +326,7 @@ private int doPeek() throws IOException {
313326 * @throws IOException
314327 */
315328 private boolean isCDATA () throws IOException {
316- return fillBuffer (CDATA_OPEN .size ()) && buffer .rangeEquals (0 , CDATA_OPEN );
329+ return fillBuffer (cdataOpen .size ()) && buffer .rangeEquals (0 , cdataOpen );
317330 }
318331
319332 /**
@@ -324,8 +337,8 @@ private boolean isCDATA() throws IOException {
324337 * @throws IOException
325338 */
326339 private boolean isDocTypeDefinition () throws IOException {
327- return buffer .size () >= DOCTYPE_OPEN .size () &&
328- buffer .snapshot (DOCTYPE_OPEN .size ()).toAsciiUppercase ().equals (DOCTYPE_OPEN );
340+ return buffer .size () >= doctypeOpen .size () &&
341+ buffer .snapshot (doctypeOpen .size ()).toAsciiUppercase ().equals (doctypeOpen );
329342 }
330343
331344 /**
@@ -564,14 +577,14 @@ public String nextTextContent() throws IOException {
564577 + "> but haven't found" );
565578 }
566579
567- return buffer .readUtf8 (index );
580+ return buffer .readString (index , charset );
568581 } else if (p == PEEKED_CDATA ) {
569582 peeked = PEEKED_NONE ;
570583
571584 // Search index of closing CDATA tag ]]>
572585 long index = indexOfClosingCDATA ();
573586
574- String result = buffer .readUtf8 (index );
587+ String result = buffer .readString (index , charset );
575588 buffer .skip (3 ); // consume ]]>
576589 return result ;
577590 } else if (p == PEEKED_ELEMENT_END ) {
@@ -673,7 +686,7 @@ public boolean nextTextContentAsBoolean() throws IOException {
673686 * @throws IOException
674687 */
675688 private long indexOfClosingCDATA () throws IOException {
676- long index = source .indexOf (CDATA_CLOSE );
689+ long index = source .indexOf (cdataClose );
677690 if (index == -1 ) {
678691 throw new EOFException ("<![CDATA[ at " + getPath () + " has never been closed with ]]>" );
679692 }
@@ -810,12 +823,12 @@ private int nextNonWhitespace(boolean throwOnEof, boolean isDocumentBeginning) t
810823 int peekStack = stack [stackSize - 1 ];
811824
812825 if (peekStack == XmlScope .NONEMPTY_DOCUMENT && isDocTypeDefinition ()) {
813- long index = source .indexOf (CLOSING_XML_ELEMENT , DOCTYPE_OPEN .size ());
826+ long index = source .indexOf (CLOSING_XML_ELEMENT , doctypeOpen .size ());
814827 if (index == -1 ) {
815828 throw syntaxError ("Unterminated <!DOCTYPE> . Inline DOCTYPE is not support at the moment." );
816829 }
817830 // check if doctype uses brackets
818- long bracketIndex = source .indexOf (OPENING_DOCTYPE_BRACKET , DOCTYPE_OPEN .size (), index );
831+ long bracketIndex = source .indexOf (OPENING_DOCTYPE_BRACKET , doctypeOpen .size (), index );
819832 if (bracketIndex != -1 ) {
820833 index = source .indexOf (ByteString .of (CLOSING_DOCTYPE_BRACKET , CLOSING_XML_ELEMENT ), index + bracketIndex );
821834 if (index == -1 ) {
@@ -829,19 +842,19 @@ private int nextNonWhitespace(boolean throwOnEof, boolean isDocumentBeginning) t
829842 p = 0 ;
830843 continue ;
831844 } else if (peek == '!' && fillBuffer (4 )) {
832- long index = source .indexOf (COMMENT_CLOSE , 4 ); // skip <!-- in comparison by offset 4
845+ long index = source .indexOf (commentClose , 4 ); // skip <!-- in comparison by offset 4
833846 if (index == -1 ) {
834847 throw syntaxError ("Unterminated comment" );
835848 }
836- source .skip (index + COMMENT_CLOSE .size ()); // skip behind --!>
849+ source .skip (index + commentClose .size ()); // skip behind --!>
837850 p = 0 ;
838851 continue ;
839852 } else if (peek == '?' ) {
840- long index = source .indexOf (XML_DECLARATION_CLOSE , 2 ); // skip <? in comparison by offset 2
853+ long index = source .indexOf (xmlDeclarationClose , 2 ); // skip <? in comparison by offset 2
841854 if (index == -1 ) {
842855 throw syntaxError ("Unterminated xml declaration or processing instruction \" <?\" " );
843856 }
844- source .skip (index + XML_DECLARATION_CLOSE .size ()); // skip behind ?>
857+ source .skip (index + xmlDeclarationClose .size ()); // skip behind ?>
845858 p = 0 ;
846859 continue ;
847860 }
@@ -896,8 +909,8 @@ public String getCurrentElementName() {
896909
897910 /** Returns an unquoted value as a string. */
898911 private String nextUnquotedValue () throws IOException {
899- long i = source .indexOfElement (UNQUOTED_STRING_TERMINALS );
900- return i != -1 ? buffer .readUtf8 ( i ) : buffer .readUtf8 ( );
912+ long i = source .indexOfElement (unquotedStringTerminals );
913+ return i != -1 ? buffer .readString ( i , charset ) : buffer .readString ( charset );
901914 }
902915
903916 /**
@@ -920,19 +933,19 @@ private String nextQuotedValue(byte runTerminator) throws IOException {
920933 // If we've got an escape character, we're going to need a string builder.
921934 if (buffer .getByte (index ) == '\\' ) {
922935 if (builder == null ) builder = new StringBuilder ();
923- builder .append (buffer .readUtf8 (index ));
936+ builder .append (buffer .readString (index , charset ));
924937 buffer .readByte (); // '\'
925938 builder .append (readEscapeCharacter ());
926939 continue ;
927940 }
928941
929942 // If it isn't the escape character, it's the quote. Return the string.
930943 if (builder == null ) {
931- String result = buffer .readUtf8 (index );
944+ String result = buffer .readString (index , charset );
932945 buffer .readByte (); // Consume the quote character.
933946 return result ;
934947 } else {
935- builder .append (buffer .readUtf8 (index ));
948+ builder .append (buffer .readString (index , charset ));
936949 buffer .readByte (); // Consume the quote character.
937950 return builder .toString ();
938951 }
@@ -988,7 +1001,7 @@ private char readEscapeCharacter() throws IOException {
9881001 } else if (c >= 'A' && c <= 'F' ) {
9891002 result += (c - 'A' + 10 );
9901003 } else {
991- throw syntaxError ("\\ u" + buffer .readUtf8 ( 4 ));
1004+ throw syntaxError ("\\ u" + buffer .readString ( 4 , charset ));
9921005 }
9931006 }
9941007 buffer .skip (4 );
0 commit comments