1 | // License: GPL. For details, see LICENSE file.
|
---|
2 | package org.openstreetmap.josm.io;
|
---|
3 |
|
---|
4 | import java.io.IOException;
|
---|
5 | import java.io.InputStream;
|
---|
6 | import java.io.InputStreamReader;
|
---|
7 | import java.io.PushbackInputStream;
|
---|
8 | import java.io.UnsupportedEncodingException;
|
---|
9 |
|
---|
10 | /**
|
---|
11 | * Detects the different UTF encodings from byte order mark
|
---|
12 | */
|
---|
13 | public final class UTFInputStreamReader extends InputStreamReader {
|
---|
14 |
|
---|
15 | /**
|
---|
16 | * Creates a new {@link InputStreamReader} from the {@link InputStream} with UTF-8 as default encoding.
|
---|
17 | * @return A reader with the correct encoding. Starts to read after the BOM.
|
---|
18 | * @throws IOException if any I/O error occurs
|
---|
19 | * @see #create(java.io.InputStream, String)
|
---|
20 | */
|
---|
21 | public static UTFInputStreamReader create(InputStream input) throws IOException {
|
---|
22 | return create(input, "UTF-8");
|
---|
23 | }
|
---|
24 |
|
---|
25 | /**
|
---|
26 | * Creates a new {@link InputStreamReader} from the {@link InputStream}.
|
---|
27 | * @param defaultEncoding Used, when no BOM was recognized. Can be null.
|
---|
28 | * @return A reader with the correct encoding. Starts to read after the BOM.
|
---|
29 | * @throws IOException if any I/O error occurs
|
---|
30 | */
|
---|
31 | public static UTFInputStreamReader create(InputStream input, String defaultEncoding) throws IOException {
|
---|
32 | byte[] bom = new byte[4];
|
---|
33 | String encoding = defaultEncoding;
|
---|
34 | int unread;
|
---|
35 | PushbackInputStream pushbackStream = new PushbackInputStream(input, 4);
|
---|
36 | int n = pushbackStream.read(bom, 0, 4);
|
---|
37 |
|
---|
38 | if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF)) {
|
---|
39 | encoding = "UTF-8";
|
---|
40 | unread = n - 3;
|
---|
41 | } else if ((bom[0] == (byte) 0x00) && (bom[1] == (byte) 0x00) && (bom[2] == (byte) 0xFE) && (bom[3] == (byte) 0xFF)) {
|
---|
42 | encoding = "UTF-32BE";
|
---|
43 | unread = n - 4;
|
---|
44 | } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE) && (bom[2] == (byte) 0x00) && (bom[3] == (byte) 0x00)) {
|
---|
45 | encoding = "UTF-32LE";
|
---|
46 | unread = n - 4;
|
---|
47 | } else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) {
|
---|
48 | encoding = "UTF-16BE";
|
---|
49 | unread = n - 2;
|
---|
50 | } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) {
|
---|
51 | encoding = "UTF-16LE";
|
---|
52 | unread = n - 2;
|
---|
53 | } else {
|
---|
54 | unread = n;
|
---|
55 | }
|
---|
56 |
|
---|
57 | if (unread > 0) {
|
---|
58 | pushbackStream.unread(bom, n - unread, unread);
|
---|
59 | } else if (unread < -1) {
|
---|
60 | pushbackStream.unread(bom, 0, 0);
|
---|
61 | }
|
---|
62 |
|
---|
63 | if (encoding == null) {
|
---|
64 | return new UTFInputStreamReader(pushbackStream);
|
---|
65 | } else {
|
---|
66 | return new UTFInputStreamReader(pushbackStream, encoding);
|
---|
67 | }
|
---|
68 | }
|
---|
69 |
|
---|
70 | private UTFInputStreamReader(InputStream in) {
|
---|
71 | super(in);
|
---|
72 | }
|
---|
73 |
|
---|
74 | private UTFInputStreamReader(InputStream in, String cs) throws UnsupportedEncodingException {
|
---|
75 | super(in, cs);
|
---|
76 | }
|
---|
77 | }
|
---|