// License: GPL. For details, see LICENSE file. package org.openstreetmap.josm.io; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.PushbackInputStream; import java.io.UnsupportedEncodingException; import java.util.Optional; /** * Detects the different UTF encodings from byte order mark. * @since 3372 */ public final class UTFInputStreamReader extends InputStreamReader { private UTFInputStreamReader(InputStream in, String cs) throws UnsupportedEncodingException { super(in, cs); } /** * Creates a new {@link InputStreamReader} from the {@link InputStream} with UTF-8 as default encoding. * @param input input stream * @return A reader with the correct encoding. Starts to read after the BOM. * @throws IOException if any I/O error occurs * @see #create(java.io.InputStream, String) */ public static UTFInputStreamReader create(InputStream input) throws IOException { return create(input, "UTF-8"); } /** * Creates a new {@link InputStreamReader} from the {@link InputStream}. * @param input input stream * @param defaultEncoding Used, when no BOM was recognized. Can be null. * @return A reader with the correct encoding. Starts to read after the BOM. * @throws IOException if any I/O error occurs */ public static UTFInputStreamReader create(InputStream input, String defaultEncoding) throws IOException { byte[] bom = new byte[4]; String encoding = defaultEncoding; int unread; PushbackInputStream pushbackStream = new PushbackInputStream(input, 4); int n = pushbackStream.read(bom, 0, 4); if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF)) { encoding = "UTF-8"; unread = n - 3; } else if ((bom[0] == (byte) 0x00) && (bom[1] == (byte) 0x00) && (bom[2] == (byte) 0xFE) && (bom[3] == (byte) 0xFF)) { encoding = "UTF-32BE"; unread = n - 4; } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE) && (bom[2] == (byte) 0x00) && (bom[3] == (byte) 0x00)) { encoding = "UTF-32LE"; unread = n - 4; } else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) { encoding = "UTF-16BE"; unread = n - 2; } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) { encoding = "UTF-16LE"; unread = n - 2; } else { unread = n; } if (unread > 0) { pushbackStream.unread(bom, n - unread, unread); } else if (unread < -1) { pushbackStream.unread(bom, 0, 0); } return new UTFInputStreamReader(pushbackStream, Optional.ofNullable(encoding).orElse("UTF-8")); } }