1 | // License: GPL. For details, see LICENSE file.
|
---|
2 | package org.openstreetmap.josm.io;
|
---|
3 |
|
---|
4 | import java.io.IOException;
|
---|
5 | import java.io.InputStream;
|
---|
6 | import java.io.InputStreamReader;
|
---|
7 | import java.io.PushbackInputStream;
|
---|
8 | import java.io.UnsupportedEncodingException;
|
---|
9 |
|
---|
10 | /**
|
---|
11 | * Detects the different UTF encodings from byte order mark
|
---|
12 | */
|
---|
13 | public final class UTFInputStreamReader extends InputStreamReader {
|
---|
14 |
|
---|
15 | /**
|
---|
16 | * Creates a new {@link InputStreamReader} from the {@link InputStream} with UTF-8 as default encoding.
|
---|
17 | * @return A reader with the correct encoding. Starts to read after the BOM.
|
---|
18 | * @see #create(java.io.InputStream, String)
|
---|
19 | */
|
---|
20 | public static UTFInputStreamReader create(InputStream input) throws IOException {
|
---|
21 | return create(input, "UTF-8");
|
---|
22 | }
|
---|
23 |
|
---|
24 | /**
|
---|
25 | * Creates a new {@link InputStreamReader} from the {@link InputStream}.
|
---|
26 | * @param defaultEncoding Used, when no BOM was recognized. Can be null.
|
---|
27 | * @return A reader with the correct encoding. Starts to read after the BOM.
|
---|
28 | */
|
---|
29 | public static UTFInputStreamReader create(InputStream input, String defaultEncoding) throws IOException {
|
---|
30 | byte[] bom = new byte[4];
|
---|
31 | String encoding = defaultEncoding;
|
---|
32 | int unread;
|
---|
33 | PushbackInputStream pushbackStream = new PushbackInputStream(input, 4);
|
---|
34 | int n = pushbackStream.read(bom, 0, 4);
|
---|
35 |
|
---|
36 | if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF)) {
|
---|
37 | encoding = "UTF-8";
|
---|
38 | unread = n - 3;
|
---|
39 | } else if ((bom[0] == (byte) 0x00) && (bom[1] == (byte) 0x00) && (bom[2] == (byte) 0xFE) && (bom[3] == (byte) 0xFF)) {
|
---|
40 | encoding = "UTF-32BE";
|
---|
41 | unread = n - 4;
|
---|
42 | } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE) && (bom[2] == (byte) 0x00) && (bom[3] == (byte) 0x00)) {
|
---|
43 | encoding = "UTF-32LE";
|
---|
44 | unread = n - 4;
|
---|
45 | } else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) {
|
---|
46 | encoding = "UTF-16BE";
|
---|
47 | unread = n - 2;
|
---|
48 | } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) {
|
---|
49 | encoding = "UTF-16LE";
|
---|
50 | unread = n - 2;
|
---|
51 | } else {
|
---|
52 | unread = n;
|
---|
53 | }
|
---|
54 |
|
---|
55 | if (unread > 0) {
|
---|
56 | pushbackStream.unread(bom, n - unread, unread);
|
---|
57 | } else if (unread < -1) {
|
---|
58 | pushbackStream.unread(bom, 0, 0);
|
---|
59 | }
|
---|
60 |
|
---|
61 | if (encoding == null) {
|
---|
62 | return new UTFInputStreamReader(pushbackStream);
|
---|
63 | } else {
|
---|
64 | return new UTFInputStreamReader(pushbackStream, encoding);
|
---|
65 | }
|
---|
66 | }
|
---|
67 |
|
---|
68 | private UTFInputStreamReader(InputStream in) {
|
---|
69 | super(in);
|
---|
70 | }
|
---|
71 |
|
---|
72 | private UTFInputStreamReader(InputStream in, String cs) throws UnsupportedEncodingException {
|
---|
73 | super(in, cs);
|
---|
74 | }
|
---|
75 | }
|
---|