Changeset 12283 in josm


Ignore:
Timestamp:
2017-05-31T03:12:36+02:00 (3 months ago)
Author:
Don-vip
Message:

fix #14858 - "Similarly named ways" test: detect accent and case variations for strings of same length

Location:
trunk
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/org/openstreetmap/josm/data/validation/tests/SimilarNamedWays.java

    r11747 r12283  
    77
    88import java.awt.geom.Point2D;
     9import java.text.Normalizer;
    910import java.util.ArrayList;
    1011import java.util.Arrays;
     
    3435
    3536    protected static final int SIMILAR_NAMED = 701;
     37
     38    private static final Pattern REMOVE_DIACRITICS = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");
    3639
    3740    /** All ways, grouped by cells */
     
    202205        boolean similar = distance > 0 && distance <= 2;
    203206
     207        // check if only the case differs, so we don't consider large distance as different strings
     208        if (distance > 2 && name.length() == name2.length()) {
     209            similar = deAccent(name).equalsIgnoreCase(deAccent(name2));
     210        }
     211
    204212        // try all rules
    205213        for (NormalizeRule rule : rules) {
     
    216224    }
    217225
     226    /**
     227     * Removes diacritics (accents) from string.
     228     * @param str string
     229     * @return {@code str} without any diacritic (accent)
     230     * @since 12283
     231     */
     232    public static String deAccent(String str) {
     233        // https://stackoverflow.com/a/1215117/2257172
     234        return REMOVE_DIACRITICS.matcher(Normalizer.normalize(str, Normalizer.Form.NFD)).replaceAll("");
     235    }
     236
    218237    @FunctionalInterface
    219238    public interface NormalizeRule {
  • trunk/test/unit/org/openstreetmap/josm/data/validation/tests/SimilarNamedWaysTest.java

    r11403 r12283  
    8888    }
    8989
     90    /**
     91     * Test similar names.
     92     */
    9093    @Test
    9194    public void testSimilarNames() {
     
    118121        checkSimilarity("first and second 2 changes", "First Street", "Soconds Street", true);
    119122        checkSimilarity("first and second 3 changes", "First Street", "Soconds Stret", false);
     123
     124        // case only, see #14858
     125        checkSimilarity("case only", "Rua São João", "Rua Sao Joao", true);
     126        checkSimilarity("case only", "Rua São João", "Rua SAO JOAO", true);
     127        checkSimilarity("case only", "Rua Sao Joao", "Rua SAO JOAO", true);
     128        checkSimilarity("case only", "Rue éèçàïù", "Rue EeCAIU", true);
    120129    }
    121130}
Note: See TracChangeset for help on using the changeset viewer.