Changeset 14948 in josm


Ignore:
Timestamp:
2019-04-01T15:49:19+02:00 (9 months ago)
Author:
Don-vip
Message:

see #17546 - detect uses of "IPA Extensions" Unicode block (U+0250..U+02AF => International Phonetic Alphabet)

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/org/openstreetmap/josm/data/validation/tests/TagChecker.java

    r14934 r14948  
    99import java.io.BufferedReader;
    1010import java.io.IOException;
     11import java.lang.Character.UnicodeBlock;
    1112import java.util.ArrayList;
    1213import java.util.Arrays;
     
    165166    protected static final int MULTIPLE_SPACES          = 1214;
    166167    protected static final int MISSPELLED_VALUE_NO_FIX  = 1215;
     168    protected static final int UNUSUAL_UNICODE_CHAR_VALUE = 1216;
    167169    // CHECKSTYLE.ON: SingleSpaceSeparator
    168170
     
    381383     */
    382384    private static boolean containsNonPrintingControlCharacter(String s) {
    383         if (s == null)
    384             return false;
    385         for (int i = 0; i < s.length(); i++) {
    386             char c = s.charAt(i);
    387             if ((isAsciiControlChar(c) && !isNewLineChar(c)) || isBidiControlChar(c))
    388                 return true;
    389         }
    390         return false;
    391     }
    392 
    393     private static boolean isAsciiControlChar(char c) {
     385        return s != null && s.chars().anyMatch(c -> (isAsciiControlChar(c) && !isNewLineChar(c)) || isBidiControlChar(c));
     386    }
     387
     388    private static boolean isAsciiControlChar(int c) {
    394389        return c < 0x20 || c == 0x7F;
    395390    }
    396391
    397     private static boolean isNewLineChar(char c) {
     392    private static boolean isNewLineChar(int c) {
    398393        return c == 0x0a || c == 0x0d;
    399394    }
    400395
    401     private static boolean isBidiControlChar(char c) {
     396    private static boolean isBidiControlChar(int c) {
    402397        /* check for range 0x200c to 0x200f (ZWNJ, ZWJ, LRM, RLM) or
    403398                           0x202a to 0x202e (LRE, RLE, PDF, LRO, RLO) */
     
    407402    static String removeNonPrintingControlCharacters(String s) {
    408403        return NON_PRINTING_CONTROL_CHARACTERS.matcher(s).replaceAll("");
     404    }
     405
     406    private static boolean containsUnusualUnicodeCharacter(String s) {
     407        return s != null && s.chars().anyMatch(c -> isUnusualUnicodeBlock(UnicodeBlock.of(c)));
     408    }
     409
     410    private static boolean isUnusualUnicodeBlock(UnicodeBlock b) {
     411        return b == UnicodeBlock.IPA_EXTENSIONS;
    409412    }
    410413
     
    545548                    .build());
    546549            withErrors.put(p, "ICV");
     550        }
     551        if ((containsUnusualUnicodeCharacter(value)) && !withErrors.contains(p, "UUCV")) {
     552            errors.add(TestError.builder(this, Severity.WARNING, UNUSUAL_UNICODE_CHAR_VALUE)
     553                    .message(tr("Tag value contains unusual Unicode character"), s, key)
     554                    .primitives(p)
     555                    .build());
     556            withErrors.put(p, "UUCV");
    547557        }
    548558        if ((value.length() > Tagged.MAX_TAG_LENGTH) && !withErrors.contains(p, "LV")) {
Note: See TracChangeset for help on using the changeset viewer.