[8378] | 1 | // License: GPL. For details, see LICENSE file.
|
---|
[3372] | 2 | package org.openstreetmap.josm.io;
|
---|
| 3 |
|
---|
[5854] | 4 | import java.io.IOException;
|
---|
[3372] | 5 | import java.io.InputStream;
|
---|
| 6 | import java.io.InputStreamReader;
|
---|
| 7 | import java.io.PushbackInputStream;
|
---|
| 8 | import java.io.UnsupportedEncodingException;
|
---|
| 9 |
|
---|
| 10 | /**
|
---|
[10182] | 11 | * Detects the different UTF encodings from byte order mark.
|
---|
| 12 | * @since 3372
|
---|
[3372] | 13 | */
|
---|
[6362] | 14 | public final class UTFInputStreamReader extends InputStreamReader {
|
---|
[7509] | 15 |
|
---|
[10182] | 16 | private UTFInputStreamReader(InputStream in, String cs) throws UnsupportedEncodingException {
|
---|
| 17 | super(in, cs);
|
---|
| 18 | }
|
---|
| 19 |
|
---|
[3372] | 20 | /**
|
---|
[6650] | 21 | * Creates a new {@link InputStreamReader} from the {@link InputStream} with UTF-8 as default encoding.
|
---|
[9231] | 22 | * @param input input stream
|
---|
[6650] | 23 | * @return A reader with the correct encoding. Starts to read after the BOM.
|
---|
[8926] | 24 | * @throws IOException if any I/O error occurs
|
---|
[6650] | 25 | * @see #create(java.io.InputStream, String)
|
---|
| 26 | */
|
---|
| 27 | public static UTFInputStreamReader create(InputStream input) throws IOException {
|
---|
| 28 | return create(input, "UTF-8");
|
---|
| 29 | }
|
---|
| 30 |
|
---|
| 31 | /**
|
---|
| 32 | * Creates a new {@link InputStreamReader} from the {@link InputStream}.
|
---|
[9231] | 33 | * @param input input stream
|
---|
[3372] | 34 | * @param defaultEncoding Used, when no BOM was recognized. Can be null.
|
---|
| 35 | * @return A reader with the correct encoding. Starts to read after the BOM.
|
---|
[8926] | 36 | * @throws IOException if any I/O error occurs
|
---|
[3372] | 37 | */
|
---|
| 38 | public static UTFInputStreamReader create(InputStream input, String defaultEncoding) throws IOException {
|
---|
[6085] | 39 | byte[] bom = new byte[4];
|
---|
[3372] | 40 | String encoding = defaultEncoding;
|
---|
| 41 | int unread;
|
---|
| 42 | PushbackInputStream pushbackStream = new PushbackInputStream(input, 4);
|
---|
| 43 | int n = pushbackStream.read(bom, 0, 4);
|
---|
| 44 |
|
---|
| 45 | if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF)) {
|
---|
| 46 | encoding = "UTF-8";
|
---|
| 47 | unread = n - 3;
|
---|
| 48 | } else if ((bom[0] == (byte) 0x00) && (bom[1] == (byte) 0x00) && (bom[2] == (byte) 0xFE) && (bom[3] == (byte) 0xFF)) {
|
---|
| 49 | encoding = "UTF-32BE";
|
---|
| 50 | unread = n - 4;
|
---|
| 51 | } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE) && (bom[2] == (byte) 0x00) && (bom[3] == (byte) 0x00)) {
|
---|
| 52 | encoding = "UTF-32LE";
|
---|
| 53 | unread = n - 4;
|
---|
| 54 | } else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) {
|
---|
| 55 | encoding = "UTF-16BE";
|
---|
| 56 | unread = n - 2;
|
---|
| 57 | } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) {
|
---|
| 58 | encoding = "UTF-16LE";
|
---|
| 59 | unread = n - 2;
|
---|
| 60 | } else {
|
---|
| 61 | unread = n;
|
---|
| 62 | }
|
---|
| 63 |
|
---|
| 64 | if (unread > 0) {
|
---|
[8345] | 65 | pushbackStream.unread(bom, n - unread, unread);
|
---|
[3372] | 66 | } else if (unread < -1) {
|
---|
| 67 | pushbackStream.unread(bom, 0, 0);
|
---|
| 68 | }
|
---|
| 69 |
|
---|
| 70 | if (encoding == null) {
|
---|
[10182] | 71 | encoding = "UTF-8";
|
---|
[3372] | 72 | }
|
---|
[10182] | 73 | return new UTFInputStreamReader(pushbackStream, encoding);
|
---|
[3372] | 74 | }
|
---|
| 75 | }
|
---|