[5854] | 1 | // License: GPL. For details, see LICENSE file.
|
---|
| 2 | package org.openstreetmap.josm.io;
|
---|
| 3 |
|
---|
| 4 | import java.io.IOException;
|
---|
[6080] | 5 | import java.io.Reader;
|
---|
[5854] | 6 |
|
---|
| 7 | import org.openstreetmap.josm.Main;
|
---|
| 8 |
|
---|
| 9 | /**
|
---|
| 10 | * FilterInputStream that gets rid of characters that are invalid in an XML 1.0
|
---|
| 11 | * document.
|
---|
| 12 | *
|
---|
| 13 | * Although these characters are forbidden, in the real wold they still appear
|
---|
| 14 | * in XML files. Java's SAX parser throws an exception, so we have to filter
|
---|
| 15 | * at a lower level.
|
---|
| 16 | *
|
---|
| 17 | * Only handles control characters (<0x20). Invalid characters are replaced
|
---|
| 18 | * by space (0x20).
|
---|
| 19 | */
|
---|
[6080] | 20 | public class InvalidXmlCharacterFilter extends Reader {
|
---|
[5854] | 21 |
|
---|
[9078] | 22 | private final Reader reader;
|
---|
[6080] | 23 |
|
---|
[8882] | 24 | private static boolean firstWarning = true;
|
---|
[5854] | 25 |
|
---|
[8882] | 26 | private static final boolean[] INVALID_CHARS;
|
---|
[6070] | 27 |
|
---|
[5854] | 28 | static {
|
---|
| 29 | INVALID_CHARS = new boolean[0x20];
|
---|
| 30 | for (int i = 0; i < INVALID_CHARS.length; ++i) {
|
---|
| 31 | INVALID_CHARS[i] = true;
|
---|
| 32 | }
|
---|
| 33 | INVALID_CHARS[0x9] = false; // tab
|
---|
| 34 | INVALID_CHARS[0xA] = false; // LF
|
---|
| 35 | INVALID_CHARS[0xD] = false; // CR
|
---|
| 36 | }
|
---|
| 37 |
|
---|
[6787] | 38 | /**
|
---|
| 39 | * Constructs a new {@code InvalidXmlCharacterFilter} for the given Reader.
|
---|
| 40 | * @param reader The reader to filter
|
---|
| 41 | */
|
---|
[6080] | 42 | public InvalidXmlCharacterFilter(Reader reader) {
|
---|
| 43 | this.reader = reader;
|
---|
[5854] | 44 | }
|
---|
| 45 |
|
---|
| 46 | @Override
|
---|
[6080] | 47 | public int read(char[] b, int off, int len) throws IOException {
|
---|
| 48 | int n = reader.read(b, off, len);
|
---|
[5854] | 49 | if (n == -1) {
|
---|
| 50 | return -1;
|
---|
| 51 | }
|
---|
[5855] | 52 | for (int i = off; i < off + n; ++i) {
|
---|
[5854] | 53 | b[i] = filter(b[i]);
|
---|
| 54 | }
|
---|
| 55 | return n;
|
---|
| 56 | }
|
---|
| 57 |
|
---|
[6080] | 58 | @Override
|
---|
| 59 | public void close() throws IOException {
|
---|
| 60 | reader.close();
|
---|
| 61 | }
|
---|
| 62 |
|
---|
[8870] | 63 | private static char filter(char in) {
|
---|
[8387] | 64 | if (in < 0x20 && INVALID_CHARS[in]) {
|
---|
[5854] | 65 | if (firstWarning) {
|
---|
[6787] | 66 | Main.warn("Invalid xml character encountered: '"+in+"'.");
|
---|
[5854] | 67 | firstWarning = false;
|
---|
| 68 | }
|
---|
| 69 | return 0x20;
|
---|
| 70 | }
|
---|
| 71 | return in;
|
---|
| 72 | }
|
---|
| 73 | }
|
---|