1 | // License: GPL. For details, see LICENSE file.
|
---|
2 | package org.openstreetmap.josm.io;
|
---|
3 |
|
---|
4 | import java.io.IOException;
|
---|
5 | import java.io.InputStream;
|
---|
6 | import java.io.InputStreamReader;
|
---|
7 | import java.io.PushbackInputStream;
|
---|
8 | import java.io.UnsupportedEncodingException;
|
---|
9 | import java.util.Optional;
|
---|
10 |
|
---|
11 | /**
|
---|
12 | * Detects the different UTF encodings from byte order mark.
|
---|
13 | * @since 3372
|
---|
14 | */
|
---|
15 | public final class UTFInputStreamReader extends InputStreamReader {
|
---|
16 |
|
---|
17 | private UTFInputStreamReader(InputStream in, String cs) throws UnsupportedEncodingException {
|
---|
18 | super(in, cs);
|
---|
19 | }
|
---|
20 |
|
---|
21 | /**
|
---|
22 | * Creates a new {@link InputStreamReader} from the {@link InputStream} with UTF-8 as default encoding.
|
---|
23 | * @param input input stream
|
---|
24 | * @return A reader with the correct encoding. Starts to read after the BOM.
|
---|
25 | * @throws IOException if any I/O error occurs
|
---|
26 | * @see #create(java.io.InputStream, String)
|
---|
27 | */
|
---|
28 | public static UTFInputStreamReader create(InputStream input) throws IOException {
|
---|
29 | return create(input, "UTF-8");
|
---|
30 | }
|
---|
31 |
|
---|
32 | /**
|
---|
33 | * Creates a new {@link InputStreamReader} from the {@link InputStream}.
|
---|
34 | * @param input input stream
|
---|
35 | * @param defaultEncoding Used, when no BOM was recognized. Can be null.
|
---|
36 | * @return A reader with the correct encoding. Starts to read after the BOM.
|
---|
37 | * @throws IOException if any I/O error occurs
|
---|
38 | */
|
---|
39 | public static UTFInputStreamReader create(InputStream input, String defaultEncoding) throws IOException {
|
---|
40 | byte[] bom = new byte[4];
|
---|
41 | String encoding = defaultEncoding;
|
---|
42 | int unread;
|
---|
43 | PushbackInputStream pushbackStream = new PushbackInputStream(input, 4);
|
---|
44 | int n = pushbackStream.read(bom, 0, 4);
|
---|
45 |
|
---|
46 | if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF)) {
|
---|
47 | encoding = "UTF-8";
|
---|
48 | unread = n - 3;
|
---|
49 | } else if ((bom[0] == (byte) 0x00) && (bom[1] == (byte) 0x00) && (bom[2] == (byte) 0xFE) && (bom[3] == (byte) 0xFF)) {
|
---|
50 | encoding = "UTF-32BE";
|
---|
51 | unread = n - 4;
|
---|
52 | } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE) && (bom[2] == (byte) 0x00) && (bom[3] == (byte) 0x00)) {
|
---|
53 | encoding = "UTF-32LE";
|
---|
54 | unread = n - 4;
|
---|
55 | } else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) {
|
---|
56 | encoding = "UTF-16BE";
|
---|
57 | unread = n - 2;
|
---|
58 | } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) {
|
---|
59 | encoding = "UTF-16LE";
|
---|
60 | unread = n - 2;
|
---|
61 | } else {
|
---|
62 | unread = n;
|
---|
63 | }
|
---|
64 |
|
---|
65 | if (unread > 0) {
|
---|
66 | pushbackStream.unread(bom, n - unread, unread);
|
---|
67 | } else if (unread < -1) {
|
---|
68 | pushbackStream.unread(bom, 0, 0);
|
---|
69 | }
|
---|
70 | return new UTFInputStreamReader(pushbackStream, Optional.ofNullable(encoding).orElse("UTF-8"));
|
---|
71 | }
|
---|
72 | }
|
---|