Skip to content

Commit d75b0c0

Browse files
committed
Support charsets other than UTF-8
1 parent 2b0a191 commit d75b0c0

File tree

8 files changed

+203
-49
lines changed

8 files changed

+203
-49
lines changed

core/src/main/java/com/tickaroo/tikxml/TikXml.java

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
import com.tickaroo.tikxml.typeadapter.TypeAdapter;
2222
import java.io.IOException;
2323
import java.lang.reflect.Type;
24+
import java.nio.charset.Charset;
25+
2426
import okio.BufferedSink;
2527
import okio.BufferedSource;
2628

@@ -64,6 +66,17 @@ public Builder writeDefaultXmlDeclaration(boolean writeDeclaration) {
6466
return this;
6567
}
6668

69+
/**
70+
* Specify the charset
71+
*
72+
* @param charset character encoding set to use when reading and writing the xml document
73+
* @return The Builder itself
74+
*/
75+
public Builder charset(Charset charset) {
76+
config.charset = charset;
77+
return this;
78+
}
79+
6780
/**
6881
* Adds an type converter for the given class
6982
*
@@ -105,7 +118,7 @@ private TikXml(TikXmlConfig config) {
105118

106119
public <T> T read(BufferedSource source, Type clazz) throws IOException {
107120

108-
XmlReader reader = XmlReader.of(source);
121+
XmlReader reader = XmlReader.of(source, config.charset);
109122

110123
reader.beginElement();
111124
reader.nextElementName(); // We don't care about the name of the root tag
@@ -125,7 +138,7 @@ public <T> void write(BufferedSink sink, T valueToWrite) throws IOException {
125138

126139
public <T> void write(BufferedSink sink, T valueToWrite, Type typeOfValueToWrite) throws IOException {
127140

128-
XmlWriter writer = XmlWriter.of(sink);
141+
XmlWriter writer = XmlWriter.of(sink, config.charset);
129142

130143
TypeAdapter<T> adapter = config.getTypeAdapter(typeOfValueToWrite);
131144
if (config.writeDefaultXmlDeclaration()) {

core/src/main/java/com/tickaroo/tikxml/TikXmlConfig.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
import com.tickaroo.tikxml.typeadapter.TypeAdapter;
2222

2323
import java.lang.reflect.Type;
24+
import java.nio.charset.Charset;
25+
import java.nio.charset.StandardCharsets;
2426

2527
/**
2628
* Holds the config for parsing and writing xml via {@link TikXml}
@@ -34,6 +36,7 @@ public final class TikXmlConfig {
3436
TypeConverters typeConverters = new TypeConverters();
3537
TypeAdapters typeAdapters = new TypeAdapters();
3638
boolean writeDefaultXmlDeclaration = true;
39+
Charset charset = StandardCharsets.UTF_8;
3740

3841
TikXmlConfig() {
3942
}
@@ -58,6 +61,15 @@ public boolean writeDefaultXmlDeclaration() {
5861
return writeDefaultXmlDeclaration;
5962
}
6063

64+
/**
65+
* The charset
66+
*
67+
* @return character encoding set to use when reading and writing the xml document
68+
*/
69+
public Charset charset() {
70+
return charset;
71+
}
72+
6173
/**
6274
* Query a {@link TypeConverter} for a given class
6375
*

core/src/main/java/com/tickaroo/tikxml/XmlReader.java

Lines changed: 29 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626
import java.io.Closeable;
2727
import java.io.EOFException;
2828
import java.io.IOException;
29+
import java.nio.charset.Charset;
30+
import java.nio.charset.StandardCharsets;
2931

3032
/**
3133
* A class to read and parse an xml stream.
@@ -37,14 +39,13 @@ public class XmlReader implements Closeable {
3739

3840
//private static final ByteString LINEFEED_OR_CARRIAGE_RETURN = ByteString.encodeUtf8("\n\r");
3941

40-
private static final ByteString UNQUOTED_STRING_TERMINALS
41-
= ByteString.encodeUtf8(" >/=\n");
42+
private final ByteString UNQUOTED_STRING_TERMINALS;
4243

43-
private static final ByteString CDATA_CLOSE = ByteString.encodeUtf8("]]>");
44-
private static final ByteString CDATA_OPEN = ByteString.encodeUtf8("<![CDATA[");
45-
private static final ByteString DOCTYPE_OPEN = ByteString.encodeUtf8("<!DOCTYPE");
46-
private static final ByteString COMMENT_CLOSE = ByteString.encodeUtf8("-->");
47-
private static final ByteString XML_DECLARATION_CLOSE = ByteString.encodeUtf8("?>");
44+
private final ByteString CDATA_CLOSE;
45+
private final ByteString CDATA_OPEN;
46+
private final ByteString DOCTYPE_OPEN;
47+
private final ByteString COMMENT_CLOSE;
48+
private final ByteString XML_DECLARATION_CLOSE;
4849
private static final ByteString UTF8_BOM = ByteString.of((byte) 0xEF, (byte) 0xBB, (byte) 0xBF);
4950

5051
private static final byte DOUBLE_QUOTE = '"';
@@ -97,21 +98,33 @@ public class XmlReader implements Closeable {
9798

9899
private final BufferedSource source;
99100
private final Buffer buffer;
101+
private final Charset charset;
100102
private String currentElementName;
101103

102-
private XmlReader(BufferedSource source) {
104+
private XmlReader(BufferedSource source, Charset charset) {
103105
if (source == null) {
104106
throw new NullPointerException("source == null");
105107
}
106108
this.source = source;
107109
this.buffer = source.buffer();
110+
this.charset = charset;
111+
UNQUOTED_STRING_TERMINALS = ByteString.encodeString(" >/=\n", charset);
112+
CDATA_CLOSE = ByteString.encodeString("]]>", charset);
113+
CDATA_OPEN = ByteString.encodeString("<![CDATA[", charset);
114+
DOCTYPE_OPEN = ByteString.encodeString("<!DOCTYPE", charset);
115+
COMMENT_CLOSE = ByteString.encodeString("-->", charset);
116+
XML_DECLARATION_CLOSE = ByteString.encodeString("?>", charset);
108117
}
109118

110119
/**
111120
* Returns a new instance that reads a XML-encoded stream from {@code source}.
112121
*/
113122
public static XmlReader of(BufferedSource source) {
114-
return new XmlReader(source);
123+
return new XmlReader(source, StandardCharsets.UTF_8);
124+
}
125+
126+
public static XmlReader of(BufferedSource source, Charset charset) {
127+
return new XmlReader(source, charset);
115128
}
116129

117130
/**
@@ -564,14 +577,14 @@ public String nextTextContent() throws IOException {
564577
+ "> but haven't found");
565578
}
566579

567-
return buffer.readUtf8(index);
580+
return buffer.readString(index, charset);
568581
} else if (p == PEEKED_CDATA) {
569582
peeked = PEEKED_NONE;
570583

571584
// Search index of closing CDATA tag ]]>
572585
long index = indexOfClosingCDATA();
573586

574-
String result = buffer.readUtf8(index);
587+
String result = buffer.readString(index, charset);
575588
buffer.skip(3); // consume ]]>
576589
return result;
577590
} else if (p == PEEKED_ELEMENT_END) {
@@ -897,7 +910,7 @@ public String getCurrentElementName() {
897910
/** Returns an unquoted value as a string. */
898911
private String nextUnquotedValue() throws IOException {
899912
long i = source.indexOfElement(UNQUOTED_STRING_TERMINALS);
900-
return i != -1 ? buffer.readUtf8(i) : buffer.readUtf8();
913+
return i != -1 ? buffer.readString(i, charset) : buffer.readString(charset);
901914
}
902915

903916
/**
@@ -920,19 +933,19 @@ private String nextQuotedValue(byte runTerminator) throws IOException {
920933
// If we've got an escape character, we're going to need a string builder.
921934
if (buffer.getByte(index) == '\\') {
922935
if (builder == null) builder = new StringBuilder();
923-
builder.append(buffer.readUtf8(index));
936+
builder.append(buffer.readString(index, charset));
924937
buffer.readByte(); // '\'
925938
builder.append(readEscapeCharacter());
926939
continue;
927940
}
928941

929942
// If it isn't the escape character, it's the quote. Return the string.
930943
if (builder == null) {
931-
String result = buffer.readUtf8(index);
944+
String result = buffer.readString(index, charset);
932945
buffer.readByte(); // Consume the quote character.
933946
return result;
934947
} else {
935-
builder.append(buffer.readUtf8(index));
948+
builder.append(buffer.readString(index, charset));
936949
buffer.readByte(); // Consume the quote character.
937950
return builder.toString();
938951
}
@@ -988,7 +1001,7 @@ private char readEscapeCharacter() throws IOException {
9881001
} else if (c >= 'A' && c <= 'F') {
9891002
result += (c - 'A' + 10);
9901003
} else {
991-
throw syntaxError("\\u" + buffer.readUtf8(4));
1004+
throw syntaxError("\\u" + buffer.readString(4, charset));
9921005
}
9931006
}
9941007
buffer.skip(4);

core/src/main/java/com/tickaroo/tikxml/XmlWriter.java

Lines changed: 40 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,15 @@
1818

1919
package com.tickaroo.tikxml;
2020

21-
import java.io.Closeable;
22-
import java.io.IOException;
2321
import okio.BufferedSink;
2422
import okio.ByteString;
2523

26-
import static com.tickaroo.tikxml.XmlScope.ELEMENT_CONTENT;
27-
import static com.tickaroo.tikxml.XmlScope.ELEMENT_OPENING;
28-
import static com.tickaroo.tikxml.XmlScope.NONEMPTY_DOCUMENT;
29-
import static com.tickaroo.tikxml.XmlScope.getTopStackElementAsToken;
24+
import java.io.Closeable;
25+
import java.io.IOException;
26+
import java.nio.charset.Charset;
27+
import java.nio.charset.StandardCharsets;
28+
29+
import static com.tickaroo.tikxml.XmlScope.*;
3030

3131
/**
3232
* With this class you can write xml with a convinient API.
@@ -75,16 +75,16 @@ public class XmlWriter implements Closeable {
7575
private static final Byte DOUBLE_QUOTE = (byte) '"';
7676
private static final Byte OPENING_XML_ELEMENT = (byte) '<';
7777
private static final Byte CLOSING_XML_ELEMENT = (byte) '>';
78-
private static final ByteString CLOSING_XML_ELEMENT_START = ByteString.encodeUtf8("</");
79-
private static final ByteString INLINE_CLOSING_XML_ELEMENT = ByteString.encodeUtf8("/>");
80-
private static final ByteString ATTRIBUTE_ASSIGNMENT_BEGIN = ByteString.encodeUtf8("=\"");
81-
private static final ByteString OPENING_CDATA = ByteString.encodeUtf8("<![CDATA[");
82-
private static final ByteString CLOSING_CDATA = ByteString.encodeUtf8("]]>");
83-
private static final ByteString XML_DECLARATION =
84-
ByteString.encodeUtf8("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
78+
private final ByteString CLOSING_XML_ELEMENT_START;
79+
private final ByteString INLINE_CLOSING_XML_ELEMENT;
80+
private final ByteString ATTRIBUTE_ASSIGNMENT_BEGIN;
81+
private final ByteString OPENING_CDATA;
82+
private final ByteString CLOSING_CDATA;
83+
private final ByteString XML_DECLARATION;
8584

8685
/** The output data, containing at most one top-level array or object. */
8786
private final BufferedSink sink;
87+
private final Charset charset;
8888
private boolean xmlDeclarationWritten = false;
8989

9090
private int[] stack = new int[32];
@@ -97,18 +97,30 @@ public class XmlWriter implements Closeable {
9797
stack[stackSize++] = XmlScope.EMPTY_DOCUMENT;
9898
}
9999

100-
private XmlWriter(BufferedSink sink) {
100+
private XmlWriter(BufferedSink sink, Charset charset) {
101101
if (sink == null) {
102102
throw new NullPointerException("sink == null");
103103
}
104104
this.sink = sink;
105+
this.charset = charset;
106+
107+
CLOSING_XML_ELEMENT_START = ByteString.encodeString("</", charset);
108+
INLINE_CLOSING_XML_ELEMENT = ByteString.encodeString("/>", charset);
109+
ATTRIBUTE_ASSIGNMENT_BEGIN = ByteString.encodeString("=\"", charset);
110+
OPENING_CDATA = ByteString.encodeString("<![CDATA[", charset);
111+
CLOSING_CDATA = ByteString.encodeString("]]>", charset);
112+
XML_DECLARATION = ByteString.encodeString("<?xml version=\"1.0\" encoding=\"" + charset.name() + "\"?>", charset);
105113
}
106114

107115
/**
108116
* Returns a new instance.
109117
*/
110118
public static XmlWriter of(BufferedSink source) {
111-
return new XmlWriter(source);
119+
return new XmlWriter(source, StandardCharsets.UTF_8);
120+
}
121+
122+
public static XmlWriter of(BufferedSink source, Charset charset) {
123+
return new XmlWriter(source, charset);
112124
}
113125

114126
private void pushStack(int newTop) {
@@ -190,14 +202,14 @@ public XmlWriter beginElement(String elementTagName) throws IOException {
190202
pushStack(XmlScope.ELEMENT_OPENING);
191203
pathNames[stackSize - 1] = elementTagName;
192204
sink.writeByte(OPENING_XML_ELEMENT)
193-
.writeUtf8(elementTagName);
205+
.writeString(elementTagName, charset);
194206
break;
195207

196208
case XmlScope.ELEMENT_CONTENT: // write a nested xml element <parent> Some optional text <nested>
197209
pushStack(XmlScope.ELEMENT_OPENING);
198210
pathNames[stackSize - 1] = elementTagName;
199211
sink.writeByte(OPENING_XML_ELEMENT)
200-
.writeUtf8(elementTagName);
212+
.writeString(elementTagName, charset);
201213
break;
202214

203215
case XmlScope.ELEMENT_OPENING: // write a nested xml element by closing the parent's xml opening header
@@ -206,7 +218,7 @@ public XmlWriter beginElement(String elementTagName) throws IOException {
206218
pathNames[stackSize - 1] = elementTagName;
207219
sink.writeByte(CLOSING_XML_ELEMENT)
208220
.writeByte(OPENING_XML_ELEMENT)
209-
.writeUtf8(elementTagName);
221+
.writeString(elementTagName, charset);
210222
break;
211223

212224
case XmlScope.NONEMPTY_DOCUMENT:
@@ -239,7 +251,7 @@ public XmlWriter endElement() throws IOException {
239251
break;
240252
case XmlScope.ELEMENT_CONTENT:
241253
sink.write(CLOSING_XML_ELEMENT_START)
242-
.writeUtf8(pathNames[stackSize - 1])
254+
.writeString(pathNames[stackSize - 1], charset)
243255
.writeByte(CLOSING_XML_ELEMENT);
244256
popStack();
245257
break;
@@ -273,11 +285,11 @@ public XmlWriter textContent(String textContentValue) throws IOException {
273285
case ELEMENT_OPENING:
274286
sink.writeByte(CLOSING_XML_ELEMENT);
275287
replaceTopOfStack(XmlScope.ELEMENT_CONTENT);
276-
sink.writeUtf8(textContentValue);
288+
sink.writeString(textContentValue, charset);
277289
break;
278290

279291
case ELEMENT_CONTENT:
280-
sink.writeUtf8(textContentValue);
292+
sink.writeString(textContentValue, charset);
281293
break;
282294

283295
default:
@@ -346,13 +358,13 @@ public XmlWriter textContentAsCData(String textContentValue) throws IOException
346358
replaceTopOfStack(XmlScope.ELEMENT_CONTENT);
347359
sink.writeByte(CLOSING_XML_ELEMENT)
348360
.write(OPENING_CDATA)
349-
.writeUtf8(textContentValue)
361+
.writeString(textContentValue, charset)
350362
.write(CLOSING_CDATA);
351363
break;
352364

353365
case ELEMENT_CONTENT:
354366
sink.write(OPENING_CDATA)
355-
.writeUtf8(textContentValue)
367+
.writeString(textContentValue, charset)
356368
.write(CLOSING_CDATA);
357369
break;
358370

@@ -386,9 +398,9 @@ public XmlWriter textContentAsCData(String textContentValue) throws IOException
386398
public XmlWriter attribute(String attributeName, String value) throws IOException {
387399
if (XmlScope.ELEMENT_OPENING == peekStack()) {
388400
sink.writeByte(' ') // Write a whitespace
389-
.writeUtf8(attributeName)
401+
.writeString(attributeName, charset)
390402
.write(ATTRIBUTE_ASSIGNMENT_BEGIN)
391-
.writeUtf8(value)
403+
.writeString(value, charset)
392404
.writeByte(DOUBLE_QUOTE);
393405
} else {
394406
throw syntaxError("Error while trying to write attribute "
@@ -462,14 +474,14 @@ public XmlWriter xmlDeclaration() throws IOException {
462474
sink.write(XML_DECLARATION);
463475
xmlDeclarationWritten = true;
464476
} else {
465-
throw syntaxError("Xml Declatraion "
466-
+ XML_DECLARATION.utf8()
477+
throw syntaxError("Xml Declaration "
478+
+ XML_DECLARATION.string(charset)
467479
+ " can only be written at the beginning of a xml document! You are not at the beginning of a xml document: current xml scope is "
468480
+ XmlScope.getTopStackElementAsToken(stackSize, stack));
469481
}
470482
} else {
471483
throw new IOException("Xml declaration "
472-
+ XML_DECLARATION.utf8()
484+
+ XML_DECLARATION.string(charset)
473485
+ " has already been written in this xml document. Xml declaration can only be written once at the beginning of the document.");
474486
}
475487

0 commit comments

Comments
 (0)