Skip to content

Commit 3b4e77c

Browse files
committed
Support charsets other than UTF-8
1 parent 2b0a191 commit 3b4e77c

File tree

8 files changed

+222
-68
lines changed

8 files changed

+222
-68
lines changed

core/src/main/java/com/tickaroo/tikxml/TikXml.java

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
import com.tickaroo.tikxml.typeadapter.TypeAdapter;
2222
import java.io.IOException;
2323
import java.lang.reflect.Type;
24+
import java.nio.charset.Charset;
25+
2426
import okio.BufferedSink;
2527
import okio.BufferedSource;
2628

@@ -64,6 +66,17 @@ public Builder writeDefaultXmlDeclaration(boolean writeDeclaration) {
6466
return this;
6567
}
6668

69+
/**
70+
* Specify the charset
71+
*
72+
* @param charset character encoding set to use when reading and writing the xml document
73+
* @return The Builder itself
74+
*/
75+
public Builder charset(Charset charset) {
76+
config.charset = charset;
77+
return this;
78+
}
79+
6780
/**
6881
* Adds an type converter for the given class
6982
*
@@ -105,7 +118,7 @@ private TikXml(TikXmlConfig config) {
105118

106119
public <T> T read(BufferedSource source, Type clazz) throws IOException {
107120

108-
XmlReader reader = XmlReader.of(source);
121+
XmlReader reader = XmlReader.of(source, config.charset);
109122

110123
reader.beginElement();
111124
reader.nextElementName(); // We don't care about the name of the root tag
@@ -125,7 +138,7 @@ public <T> void write(BufferedSink sink, T valueToWrite) throws IOException {
125138

126139
public <T> void write(BufferedSink sink, T valueToWrite, Type typeOfValueToWrite) throws IOException {
127140

128-
XmlWriter writer = XmlWriter.of(sink);
141+
XmlWriter writer = XmlWriter.of(sink, config.charset);
129142

130143
TypeAdapter<T> adapter = config.getTypeAdapter(typeOfValueToWrite);
131144
if (config.writeDefaultXmlDeclaration()) {

core/src/main/java/com/tickaroo/tikxml/TikXmlConfig.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
import com.tickaroo.tikxml.typeadapter.TypeAdapter;
2222

2323
import java.lang.reflect.Type;
24+
import java.nio.charset.Charset;
25+
import java.nio.charset.StandardCharsets;
2426

2527
/**
2628
* Holds the config for parsing and writing xml via {@link TikXml}
@@ -34,6 +36,7 @@ public final class TikXmlConfig {
3436
TypeConverters typeConverters = new TypeConverters();
3537
TypeAdapters typeAdapters = new TypeAdapters();
3638
boolean writeDefaultXmlDeclaration = true;
39+
Charset charset = StandardCharsets.UTF_8;
3740

3841
TikXmlConfig() {
3942
}
@@ -58,6 +61,15 @@ public boolean writeDefaultXmlDeclaration() {
5861
return writeDefaultXmlDeclaration;
5962
}
6063

64+
/**
65+
* The charset
66+
*
67+
* @return character encoding set to use when reading and writing the xml document
68+
*/
69+
public Charset charset() {
70+
return charset;
71+
}
72+
6173
/**
6274
* Query a {@link TypeConverter} for a given class
6375
*

core/src/main/java/com/tickaroo/tikxml/XmlReader.java

Lines changed: 40 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626
import java.io.Closeable;
2727
import java.io.EOFException;
2828
import java.io.IOException;
29+
import java.nio.charset.Charset;
30+
import java.nio.charset.StandardCharsets;
2931

3032
/**
3133
* A class to read and parse an xml stream.
@@ -37,14 +39,13 @@ public class XmlReader implements Closeable {
3739

3840
//private static final ByteString LINEFEED_OR_CARRIAGE_RETURN = ByteString.encodeUtf8("\n\r");
3941

40-
private static final ByteString UNQUOTED_STRING_TERMINALS
41-
= ByteString.encodeUtf8(" >/=\n");
42+
private final ByteString unquotedStringTerminals;
4243

43-
private static final ByteString CDATA_CLOSE = ByteString.encodeUtf8("]]>");
44-
private static final ByteString CDATA_OPEN = ByteString.encodeUtf8("<![CDATA[");
45-
private static final ByteString DOCTYPE_OPEN = ByteString.encodeUtf8("<!DOCTYPE");
46-
private static final ByteString COMMENT_CLOSE = ByteString.encodeUtf8("-->");
47-
private static final ByteString XML_DECLARATION_CLOSE = ByteString.encodeUtf8("?>");
44+
private final ByteString cdataClose;
45+
private final ByteString cdataOpen;
46+
private final ByteString doctypeOpen;
47+
private final ByteString commentClose;
48+
private final ByteString xmlDeclarationClose;
4849
private static final ByteString UTF8_BOM = ByteString.of((byte) 0xEF, (byte) 0xBB, (byte) 0xBF);
4950

5051
private static final byte DOUBLE_QUOTE = '"';
@@ -97,21 +98,33 @@ public class XmlReader implements Closeable {
9798

9899
private final BufferedSource source;
99100
private final Buffer buffer;
101+
private final Charset charset;
100102
private String currentElementName;
101103

102-
private XmlReader(BufferedSource source) {
104+
private XmlReader(BufferedSource source, Charset charset) {
103105
if (source == null) {
104106
throw new NullPointerException("source == null");
105107
}
106108
this.source = source;
107109
this.buffer = source.buffer();
110+
this.charset = charset;
111+
unquotedStringTerminals = ByteString.encodeString(" >/=\n", charset);
112+
cdataClose = ByteString.encodeString("]]>", charset);
113+
cdataOpen = ByteString.encodeString("<![CDATA[", charset);
114+
doctypeOpen = ByteString.encodeString("<!DOCTYPE", charset);
115+
commentClose = ByteString.encodeString("-->", charset);
116+
xmlDeclarationClose = ByteString.encodeString("?>", charset);
108117
}
109118

110119
/**
111120
* Returns a new instance that reads a XML-encoded stream from {@code source}.
112121
*/
113122
public static XmlReader of(BufferedSource source) {
114-
return new XmlReader(source);
123+
return new XmlReader(source, StandardCharsets.UTF_8);
124+
}
125+
126+
public static XmlReader of(BufferedSource source, Charset charset) {
127+
return new XmlReader(source, charset);
115128
}
116129

117130
/**
@@ -313,7 +326,7 @@ private int doPeek() throws IOException {
313326
* @throws IOException
314327
*/
315328
private boolean isCDATA() throws IOException {
316-
return fillBuffer(CDATA_OPEN.size()) && buffer.rangeEquals(0, CDATA_OPEN);
329+
return fillBuffer(cdataOpen.size()) && buffer.rangeEquals(0, cdataOpen);
317330
}
318331

319332
/**
@@ -324,8 +337,8 @@ private boolean isCDATA() throws IOException {
324337
* @throws IOException
325338
*/
326339
private boolean isDocTypeDefinition() throws IOException {
327-
return buffer.size() >= DOCTYPE_OPEN.size() &&
328-
buffer.snapshot(DOCTYPE_OPEN.size()).toAsciiUppercase().equals(DOCTYPE_OPEN);
340+
return buffer.size() >= doctypeOpen.size() &&
341+
buffer.snapshot(doctypeOpen.size()).toAsciiUppercase().equals(doctypeOpen);
329342
}
330343

331344
/**
@@ -564,14 +577,14 @@ public String nextTextContent() throws IOException {
564577
+ "> but haven't found");
565578
}
566579

567-
return buffer.readUtf8(index);
580+
return buffer.readString(index, charset);
568581
} else if (p == PEEKED_CDATA) {
569582
peeked = PEEKED_NONE;
570583

571584
// Search index of closing CDATA tag ]]>
572585
long index = indexOfClosingCDATA();
573586

574-
String result = buffer.readUtf8(index);
587+
String result = buffer.readString(index, charset);
575588
buffer.skip(3); // consume ]]>
576589
return result;
577590
} else if (p == PEEKED_ELEMENT_END) {
@@ -673,7 +686,7 @@ public boolean nextTextContentAsBoolean() throws IOException {
673686
* @throws IOException
674687
*/
675688
private long indexOfClosingCDATA() throws IOException {
676-
long index = source.indexOf(CDATA_CLOSE);
689+
long index = source.indexOf(cdataClose);
677690
if (index == -1) {
678691
throw new EOFException("<![CDATA[ at " + getPath() + " has never been closed with ]]>");
679692
}
@@ -810,12 +823,12 @@ private int nextNonWhitespace(boolean throwOnEof, boolean isDocumentBeginning) t
810823
int peekStack = stack[stackSize - 1];
811824

812825
if (peekStack == XmlScope.NONEMPTY_DOCUMENT && isDocTypeDefinition()) {
813-
long index = source.indexOf(CLOSING_XML_ELEMENT, DOCTYPE_OPEN.size());
826+
long index = source.indexOf(CLOSING_XML_ELEMENT, doctypeOpen.size());
814827
if (index == -1) {
815828
throw syntaxError("Unterminated <!DOCTYPE> . Inline DOCTYPE is not support at the moment.");
816829
}
817830
// check if doctype uses brackets
818-
long bracketIndex = source.indexOf(OPENING_DOCTYPE_BRACKET, DOCTYPE_OPEN.size(), index);
831+
long bracketIndex = source.indexOf(OPENING_DOCTYPE_BRACKET, doctypeOpen.size(), index);
819832
if (bracketIndex != -1) {
820833
index = source.indexOf(ByteString.of(CLOSING_DOCTYPE_BRACKET, CLOSING_XML_ELEMENT), index + bracketIndex);
821834
if (index == -1) {
@@ -829,19 +842,19 @@ private int nextNonWhitespace(boolean throwOnEof, boolean isDocumentBeginning) t
829842
p = 0;
830843
continue;
831844
} else if (peek == '!' && fillBuffer(4)) {
832-
long index = source.indexOf(COMMENT_CLOSE, 4); // skip <!-- in comparison by offset 4
845+
long index = source.indexOf(commentClose, 4); // skip <!-- in comparison by offset 4
833846
if (index == -1) {
834847
throw syntaxError("Unterminated comment");
835848
}
836-
source.skip(index + COMMENT_CLOSE.size()); // skip behind --!>
849+
source.skip(index + commentClose.size()); // skip behind --!>
837850
p = 0;
838851
continue;
839852
} else if (peek == '?') {
840-
long index = source.indexOf(XML_DECLARATION_CLOSE, 2); // skip <? in comparison by offset 2
853+
long index = source.indexOf(xmlDeclarationClose, 2); // skip <? in comparison by offset 2
841854
if (index == -1) {
842855
throw syntaxError("Unterminated xml declaration or processing instruction \"<?\"");
843856
}
844-
source.skip(index + XML_DECLARATION_CLOSE.size()); // skip behind ?>
857+
source.skip(index + xmlDeclarationClose.size()); // skip behind ?>
845858
p = 0;
846859
continue;
847860
}
@@ -896,8 +909,8 @@ public String getCurrentElementName() {
896909

897910
/** Returns an unquoted value as a string. */
898911
private String nextUnquotedValue() throws IOException {
899-
long i = source.indexOfElement(UNQUOTED_STRING_TERMINALS);
900-
return i != -1 ? buffer.readUtf8(i) : buffer.readUtf8();
912+
long i = source.indexOfElement(unquotedStringTerminals);
913+
return i != -1 ? buffer.readString(i, charset) : buffer.readString(charset);
901914
}
902915

903916
/**
@@ -920,19 +933,19 @@ private String nextQuotedValue(byte runTerminator) throws IOException {
920933
// If we've got an escape character, we're going to need a string builder.
921934
if (buffer.getByte(index) == '\\') {
922935
if (builder == null) builder = new StringBuilder();
923-
builder.append(buffer.readUtf8(index));
936+
builder.append(buffer.readString(index, charset));
924937
buffer.readByte(); // '\'
925938
builder.append(readEscapeCharacter());
926939
continue;
927940
}
928941

929942
// If it isn't the escape character, it's the quote. Return the string.
930943
if (builder == null) {
931-
String result = buffer.readUtf8(index);
944+
String result = buffer.readString(index, charset);
932945
buffer.readByte(); // Consume the quote character.
933946
return result;
934947
} else {
935-
builder.append(buffer.readUtf8(index));
948+
builder.append(buffer.readString(index, charset));
936949
buffer.readByte(); // Consume the quote character.
937950
return builder.toString();
938951
}
@@ -988,7 +1001,7 @@ private char readEscapeCharacter() throws IOException {
9881001
} else if (c >= 'A' && c <= 'F') {
9891002
result += (c - 'A' + 10);
9901003
} else {
991-
throw syntaxError("\\u" + buffer.readUtf8(4));
1004+
throw syntaxError("\\u" + buffer.readString(4, charset));
9921005
}
9931006
}
9941007
buffer.skip(4);

0 commit comments

Comments
 (0)