1 | // License: GPL. For details, see LICENSE file.
|
---|
2 | package org.openstreetmap.josm.io;
|
---|
3 |
|
---|
4 | import java.io.IOException;
|
---|
5 | import java.io.Reader;
|
---|
6 |
|
---|
7 | import org.openstreetmap.josm.Main;
|
---|
8 |
|
---|
9 | /**
|
---|
10 | * FilterInputStream that gets rid of characters that are invalid in an XML 1.0
|
---|
11 | * document.
|
---|
12 | *
|
---|
13 | * Although these characters are forbidden, in the real wold they still appear
|
---|
14 | * in XML files. Java's SAX parser throws an exception, so we have to filter
|
---|
15 | * at a lower level.
|
---|
16 | *
|
---|
17 | * Only handles control characters (<0x20). Invalid characters are replaced
|
---|
18 | * by space (0x20).
|
---|
19 | */
|
---|
20 | public class InvalidXmlCharacterFilter extends Reader {
|
---|
21 |
|
---|
22 | private final Reader reader;
|
---|
23 |
|
---|
24 | private static boolean firstWarning = true;
|
---|
25 |
|
---|
26 | private static final boolean[] INVALID_CHARS;
|
---|
27 |
|
---|
28 | static {
|
---|
29 | INVALID_CHARS = new boolean[0x20];
|
---|
30 | for (int i = 0; i < INVALID_CHARS.length; ++i) {
|
---|
31 | INVALID_CHARS[i] = true;
|
---|
32 | }
|
---|
33 | INVALID_CHARS[0x9] = false; // tab
|
---|
34 | INVALID_CHARS[0xA] = false; // LF
|
---|
35 | INVALID_CHARS[0xD] = false; // CR
|
---|
36 | }
|
---|
37 |
|
---|
38 | /**
|
---|
39 | * Constructs a new {@code InvalidXmlCharacterFilter} for the given Reader.
|
---|
40 | * @param reader The reader to filter
|
---|
41 | */
|
---|
42 | public InvalidXmlCharacterFilter(Reader reader) {
|
---|
43 | this.reader = reader;
|
---|
44 | }
|
---|
45 |
|
---|
46 | @Override
|
---|
47 | public int read(char[] b, int off, int len) throws IOException {
|
---|
48 | int n = reader.read(b, off, len);
|
---|
49 | if (n == -1) {
|
---|
50 | return -1;
|
---|
51 | }
|
---|
52 | for (int i = off; i < off + n; ++i) {
|
---|
53 | b[i] = filter(b[i]);
|
---|
54 | }
|
---|
55 | return n;
|
---|
56 | }
|
---|
57 |
|
---|
58 | @Override
|
---|
59 | public void close() throws IOException {
|
---|
60 | reader.close();
|
---|
61 | }
|
---|
62 |
|
---|
63 | private static char filter(char in) {
|
---|
64 | if (in < 0x20 && INVALID_CHARS[in]) {
|
---|
65 | if (firstWarning) {
|
---|
66 | Main.warn("Invalid xml character encountered: '"+in+"'.");
|
---|
67 | firstWarning = false;
|
---|
68 | }
|
---|
69 | return 0x20;
|
---|
70 | }
|
---|
71 | return in;
|
---|
72 | }
|
---|
73 | }
|
---|