1 | // License: GPL. For details, see LICENSE file.
|
---|
2 | package org.openstreetmap.josm.io;
|
---|
3 |
|
---|
4 | import java.io.FilterInputStream;
|
---|
5 | import java.io.IOException;
|
---|
6 | import java.io.InputStream;
|
---|
7 |
|
---|
8 | import org.openstreetmap.josm.Main;
|
---|
9 |
|
---|
10 | /**
|
---|
11 | * FilterInputStream that gets rid of characters that are invalid in an XML 1.0
|
---|
12 | * document.
|
---|
13 | *
|
---|
14 | * Although these characters are forbidden, in the real wold they still appear
|
---|
15 | * in XML files. Java's SAX parser throws an exception, so we have to filter
|
---|
16 | * at a lower level.
|
---|
17 | *
|
---|
18 | * Only handles control characters (<0x20). Invalid characters are replaced
|
---|
19 | * by space (0x20).
|
---|
20 | */
|
---|
21 | public class InvalidXmlCharacterFilter extends FilterInputStream {
|
---|
22 |
|
---|
23 | public static boolean firstWarning = true;
|
---|
24 |
|
---|
25 | public static final boolean[] INVALID_CHARS;
|
---|
26 |
|
---|
27 | static {
|
---|
28 | INVALID_CHARS = new boolean[0x20];
|
---|
29 | for (int i = 0; i < INVALID_CHARS.length; ++i) {
|
---|
30 | INVALID_CHARS[i] = true;
|
---|
31 | }
|
---|
32 | INVALID_CHARS[0x9] = false; // tab
|
---|
33 | INVALID_CHARS[0xA] = false; // LF
|
---|
34 | INVALID_CHARS[0xD] = false; // CR
|
---|
35 | }
|
---|
36 |
|
---|
37 | public InvalidXmlCharacterFilter(InputStream in) {
|
---|
38 | super(in);
|
---|
39 | }
|
---|
40 |
|
---|
41 | @Override
|
---|
42 | public int read() throws IOException {
|
---|
43 | return filter((byte)super.read());
|
---|
44 | }
|
---|
45 |
|
---|
46 | @Override
|
---|
47 | public int read(byte[] b, int off, int len) throws IOException {
|
---|
48 | int n = super.read(b, off, len);
|
---|
49 | if (n == -1) {
|
---|
50 | return -1;
|
---|
51 | }
|
---|
52 | for (int i = off; i < off + n; ++i) {
|
---|
53 | b[i] = filter(b[i]);
|
---|
54 | }
|
---|
55 | return n;
|
---|
56 | }
|
---|
57 |
|
---|
58 | private byte filter(byte in) {
|
---|
59 | if (in < 0x20 && in >= 0 && INVALID_CHARS[in]) {
|
---|
60 | if (firstWarning) {
|
---|
61 | Main.warn("Invalid xml character encountered.");
|
---|
62 | firstWarning = false;
|
---|
63 | }
|
---|
64 | return 0x20;
|
---|
65 | }
|
---|
66 | return in;
|
---|
67 | }
|
---|
68 |
|
---|
69 | }
|
---|