Ignore:
Timestamp:
2017-05-31T03:12:36+02:00 (7 years ago)
Author:
Don-vip
Message:

fix #14858 - "Similarly named ways" test: detect accent and case variations for strings of same length

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/org/openstreetmap/josm/data/validation/tests/SimilarNamedWays.java

    r11747 r12283  
    77
    88import java.awt.geom.Point2D;
     9import java.text.Normalizer;
    910import java.util.ArrayList;
    1011import java.util.Arrays;
     
    3435
    3536    protected static final int SIMILAR_NAMED = 701;
     37
     38    private static final Pattern REMOVE_DIACRITICS = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");
    3639
    3740    /** All ways, grouped by cells */
     
    202205        boolean similar = distance > 0 && distance <= 2;
    203206
     207        // check if only the case differs, so we don't consider large distance as different strings
     208        if (distance > 2 && name.length() == name2.length()) {
     209            similar = deAccent(name).equalsIgnoreCase(deAccent(name2));
     210        }
     211
    204212        // try all rules
    205213        for (NormalizeRule rule : rules) {
     
    216224    }
    217225
     226    /**
     227     * Removes diacritics (accents) from string.
     228     * @param str string
     229     * @return {@code str} without any diacritic (accent)
     230     * @since 12283
     231     */
     232    public static String deAccent(String str) {
     233        // https://stackoverflow.com/a/1215117/2257172
     234        return REMOVE_DIACRITICS.matcher(Normalizer.normalize(str, Normalizer.Form.NFD)).replaceAll("");
     235    }
     236
    218237    @FunctionalInterface
    219238    public interface NormalizeRule {
Note: See TracChangeset for help on using the changeset viewer.