[8378] | 1 | // License: GPL. For details, see LICENSE file.
|
---|
[3372] | 2 | package org.openstreetmap.josm.io;
|
---|
| 3 |
|
---|
[5854] | 4 | import java.io.IOException;
|
---|
[3372] | 5 | import java.io.InputStream;
|
---|
| 6 | import java.io.InputStreamReader;
|
---|
| 7 | import java.io.PushbackInputStream;
|
---|
| 8 | import java.io.UnsupportedEncodingException;
|
---|
[11553] | 9 | import java.util.Optional;
|
---|
[3372] | 10 |
|
---|
| 11 | /**
|
---|
[10182] | 12 | * Detects the different UTF encodings from byte order mark.
|
---|
| 13 | * @since 3372
|
---|
[3372] | 14 | */
|
---|
[6362] | 15 | public final class UTFInputStreamReader extends InputStreamReader {
|
---|
[7509] | 16 |
|
---|
[10182] | 17 | private UTFInputStreamReader(InputStream in, String cs) throws UnsupportedEncodingException {
|
---|
| 18 | super(in, cs);
|
---|
| 19 | }
|
---|
| 20 |
|
---|
[3372] | 21 | /**
|
---|
[6650] | 22 | * Creates a new {@link InputStreamReader} from the {@link InputStream} with UTF-8 as default encoding.
|
---|
[9231] | 23 | * @param input input stream
|
---|
[6650] | 24 | * @return A reader with the correct encoding. Starts to read after the BOM.
|
---|
[8926] | 25 | * @throws IOException if any I/O error occurs
|
---|
[6650] | 26 | * @see #create(java.io.InputStream, String)
|
---|
| 27 | */
|
---|
| 28 | public static UTFInputStreamReader create(InputStream input) throws IOException {
|
---|
| 29 | return create(input, "UTF-8");
|
---|
| 30 | }
|
---|
| 31 |
|
---|
| 32 | /**
|
---|
| 33 | * Creates a new {@link InputStreamReader} from the {@link InputStream}.
|
---|
[9231] | 34 | * @param input input stream
|
---|
[3372] | 35 | * @param defaultEncoding Used, when no BOM was recognized. Can be null.
|
---|
| 36 | * @return A reader with the correct encoding. Starts to read after the BOM.
|
---|
[8926] | 37 | * @throws IOException if any I/O error occurs
|
---|
[3372] | 38 | */
|
---|
| 39 | public static UTFInputStreamReader create(InputStream input, String defaultEncoding) throws IOException {
|
---|
[6085] | 40 | byte[] bom = new byte[4];
|
---|
[3372] | 41 | String encoding = defaultEncoding;
|
---|
| 42 | int unread;
|
---|
| 43 | PushbackInputStream pushbackStream = new PushbackInputStream(input, 4);
|
---|
| 44 | int n = pushbackStream.read(bom, 0, 4);
|
---|
| 45 |
|
---|
| 46 | if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF)) {
|
---|
| 47 | encoding = "UTF-8";
|
---|
| 48 | unread = n - 3;
|
---|
| 49 | } else if ((bom[0] == (byte) 0x00) && (bom[1] == (byte) 0x00) && (bom[2] == (byte) 0xFE) && (bom[3] == (byte) 0xFF)) {
|
---|
| 50 | encoding = "UTF-32BE";
|
---|
| 51 | unread = n - 4;
|
---|
| 52 | } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE) && (bom[2] == (byte) 0x00) && (bom[3] == (byte) 0x00)) {
|
---|
| 53 | encoding = "UTF-32LE";
|
---|
| 54 | unread = n - 4;
|
---|
| 55 | } else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) {
|
---|
| 56 | encoding = "UTF-16BE";
|
---|
| 57 | unread = n - 2;
|
---|
| 58 | } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) {
|
---|
| 59 | encoding = "UTF-16LE";
|
---|
| 60 | unread = n - 2;
|
---|
| 61 | } else {
|
---|
| 62 | unread = n;
|
---|
| 63 | }
|
---|
| 64 |
|
---|
| 65 | if (unread > 0) {
|
---|
[8345] | 66 | pushbackStream.unread(bom, n - unread, unread);
|
---|
[3372] | 67 | } else if (unread < -1) {
|
---|
| 68 | pushbackStream.unread(bom, 0, 0);
|
---|
| 69 | }
|
---|
[11553] | 70 | return new UTFInputStreamReader(pushbackStream, Optional.ofNullable(encoding).orElse("UTF-8"));
|
---|
[3372] | 71 | }
|
---|
| 72 | }
|
---|