Changeset 14933 in josm


Ignore:
Timestamp:
2019-03-26T01:58:41+01:00 (3 weeks ago)
Author:
Don-vip
Message:

fix #15645, fix #17521 - Detect and fix non-printing control characters

Location:
trunk
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/org/openstreetmap/josm/data/validation/tests/TagChecker.java

    r14897 r14933  
    2020import java.util.Map.Entry;
    2121import java.util.Set;
     22import java.util.regex.Pattern;
    2223
    2324import javax.swing.JCheckBox;
     
    7273    /** often used tags which are not in presets */
    7374    private static volatile MultiMap<String, String> oftenUsedTags = new MultiMap<>();
     75
     76    private static final Pattern NON_PRINTING_CONTROL_CHARACTERS = Pattern.compile(
     77            "[\\x00-\\x09\\x0B\\x0C\\x0E-\\x1F\\x7F\\u200c-\\u200f\\u202a-\\u202e]");
    7478
    7579    /** The TagChecker data */
     
    372376
    373377    /**
    374      * Checks given string (key or value) if it contains characters with code below 0x20 (either newline or some other special characters)
     378     * Checks given string (key or value) if it contains non-printing control characters (either ASCII or Unicode bidi characters)
    375379     * @param s string to check
    376      * @return {@code true} if {@code s} contains characters with code below 0x20
    377      */
    378     private static boolean containsLow(String s) {
     380     * @return {@code true} if {@code s} contains non-printing control characters
     381     */
     382    private static boolean containsNonPrintingControlCharacter(String s) {
    379383        if (s == null)
    380384            return false;
    381385        for (int i = 0; i < s.length(); i++) {
    382             if (s.charAt(i) < 0x20)
     386            char c = s.charAt(i);
     387            if ((IsAsciiControlChar(c) && !isNewLineChar(c)) || IsBidiControlChar(c))
    383388                return true;
    384389        }
    385390        return false;
     391    }
     392
     393    private static boolean IsAsciiControlChar(char c) {
     394        return c < 0x20 || c == 0x7F;
     395    }
     396
     397    private static boolean isNewLineChar(char c) {
     398        return c == 0x0a || c == 0x0d;
     399    }
     400
     401    private static boolean IsBidiControlChar(char c) {
     402        /* check for range 0x200c to 0x200f (ZWNJ, ZWJ, LRM, RLM) or
     403                           0x202a to 0x202e (LRE, RLE, PDF, LRO, RLO) */
     404        return (((c & 0xfffffffc) == 0x200c) || ((c >= 0x202a) && (c <= 0x202e)));
     405    }
     406
     407    static String removeNonPrintingControlCharacters(String s) {
     408        return NON_PRINTING_CONTROL_CHARACTERS.matcher(s).replaceAll("");
    386409    }
    387410
     
    515538        if (!checkValues || value == null)
    516539            return;
    517         if ((containsLow(value)) && !withErrors.contains(p, "ICV")) {
     540        if ((containsNonPrintingControlCharacter(value)) && !withErrors.contains(p, "ICV")) {
    518541            errors.add(TestError.builder(this, Severity.WARNING, LOW_CHAR_VALUE)
    519                     .message(tr("Tag value contains character with code less than 0x20"), s, key)
    520                     .primitives(p)
     542                    .message(tr("Tag value contains non-printing character"), s, key)
     543                    .primitives(p)
     544                    .fix(() -> new ChangePropertyCommand(p, key, removeNonPrintingControlCharacters(value)))
    521545                    .build());
    522546            withErrors.put(p, "ICV");
     
    563587        if (!checkKeys || key == null)
    564588            return;
    565         if ((containsLow(key)) && !withErrors.contains(p, "ICK")) {
     589        if ((containsNonPrintingControlCharacter(key)) && !withErrors.contains(p, "ICK")) {
    566590            errors.add(TestError.builder(this, Severity.WARNING, LOW_CHAR_KEY)
    567                     .message(tr("Tag key contains character with code less than 0x20"), s, key)
    568                     .primitives(p)
     591                    .message(tr("Tag key contains non-printing character"), s, key)
     592                    .primitives(p)
     593                    .fix(() -> new ChangePropertyCommand(p, key, removeNonPrintingControlCharacters(key)))
    569594                    .build());
    570595            withErrors.put(p, "ICK");
  • trunk/test/unit/org/openstreetmap/josm/data/validation/tests/TagCheckerTest.java

    r14897 r14933  
    248248    }
    249249
     250    /**
     251     * Unit test of {@link TagChecker#removeNonPrintingControlCharacters}
     252     */
     253    @Test
     254    public void testRemoveUnprintableControlCharacters() {
     255        // Check 65 ASCII control characters are removed, except new lines
     256        for (char c = 0x0; c < 0x20; c++) {
     257            if (c != '\r' && c != '\n') {
     258                assertTrue(TagChecker.removeNonPrintingControlCharacters(Character.toString(c)).isEmpty());
     259            } else {
     260                assertFalse(TagChecker.removeNonPrintingControlCharacters(Character.toString(c)).isEmpty());
     261            }
     262        }
     263        assertTrue(TagChecker.removeNonPrintingControlCharacters(Character.toString((char) 0x7F)).isEmpty());
     264        // Check 9 Unicode bidi control characters are removed
     265        for (char c = 0x200c; c <= 0x200f; c++) {
     266            assertTrue(TagChecker.removeNonPrintingControlCharacters(Character.toString(c)).isEmpty());
     267        }
     268        for (char c = 0x202a; c <= 0x202e; c++) {
     269            assertTrue(TagChecker.removeNonPrintingControlCharacters(Character.toString(c)).isEmpty());
     270        }
     271    }
    250272}
Note: See TracChangeset for help on using the changeset viewer.