# Changeset 14371 in josm

Ignore:
Timestamp:
2018-10-27T13:34:59+02:00 (20 months ago)
Message:

fix #15889 - add MapCSS function `is_similar`

This function tests if two strings are similar. Logic extracted from SimilarNamedWays validation test.

Location:
trunk
Files:
4 edited

Unmodified
Removed
• ## trunk/src/org/openstreetmap/josm/data/validation/tests/SimilarNamedWays.java

 r13980 /** * Compute Levenshtein distance * * @param s First word * @param t Second word * @return The distance between words */ public static int getLevenshteinDistance(String s, String t) { int[][] d; // matrix int n; // length of s int m; // length of t int i; // iterates through s int j; // iterates through t char si; // ith character of s char tj; // jth character of t int cost; // cost // Step 1 n = s.length(); m = t.length(); if (n == 0) return m; if (m == 0) return n; d = new int[n+1][m+1]; // Step 2 for (i = 0; i <= n; i++) { d[i][0] = i; } for (j = 0; j <= m; j++) { d[0][j] = j; } // Step 3 for (i = 1; i <= n; i++) { si = s.charAt(i - 1); // Step 4 for (j = 1; j <= m; j++) { tj = t.charAt(j - 1); // Step 5 if (si == tj) { cost = 0; } else { cost = 1; } // Step 6 d[i][j] = Math.min(Math.min(d[i - 1][j] + 1, d[i][j - 1] + 1), d[i - 1][j - 1] + cost); } } // Step 7 return d[n][m]; } /** * Add a regular expression rule. * @param regExpr the regular expression to search for */ public boolean similaryName(String name, String name2) { // check plain strings int distance = getLevenshteinDistance(name, name2); boolean similar = distance > 0 && distance <= 2; // check if only the case differs, so we don't consider large distance as different strings if (distance > 2 && name.length() == name2.length()) { similar = Utils.deAccent(name).equalsIgnoreCase(Utils.deAccent(name2)); } boolean similar = Utils.isSimilar(name, name2); // try all rules for (NormalizeRule rule : rules) { int levenshteinDistance = getLevenshteinDistance(rule.normalize(name), rule.normalize(name2)); int levenshteinDistance = Utils.getLevenshteinDistance(rule.normalize(name), rule.normalize(name2)); if (levenshteinDistance == 0) // one rule results in identical names: identical
• ## trunk/src/org/openstreetmap/josm/gui/mappaint/mapcss/ExpressionFactory.java

 r13811 /** * Check if two strings are similar, but not identical, i.e., have a Levenshtein distance of 1 or 2. * @param string1 first string to compare * @param string2 second string to compare * @return true if the normalized strings are different but only a "little bit" * @see Utils#isSimilar * @since 14371 */ public static boolean is_similar(String string1, String string2) { return Utils.isSimilar(string1, string2); } /** * Percent-decode a string. (See https://en.wikipedia.org/wiki/Percent-encoding) * This is especially useful for wikipedia titles
• ## trunk/src/org/openstreetmap/josm/tools/Utils.java

 r14247 /** * Compute Levenshtein distance * * @param s First word * @param t Second word * @return The distance between words * @since 14371 */ public static int getLevenshteinDistance(String s, String t) { int[][] d; // matrix int n; // length of s int m; // length of t int i; // iterates through s int j; // iterates through t char si; // ith character of s char tj; // jth character of t int cost; // cost // Step 1 n = s.length(); m = t.length(); if (n == 0) return m; if (m == 0) return n; d = new int[n+1][m+1]; // Step 2 for (i = 0; i <= n; i++) { d[i][0] = i; } for (j = 0; j <= m; j++) { d[0][j] = j; } // Step 3 for (i = 1; i <= n; i++) { si = s.charAt(i - 1); // Step 4 for (j = 1; j <= m; j++) { tj = t.charAt(j - 1); // Step 5 if (si == tj) { cost = 0; } else { cost = 1; } // Step 6 d[i][j] = Math.min(Math.min(d[i - 1][j] + 1, d[i][j - 1] + 1), d[i - 1][j - 1] + cost); } } // Step 7 return d[n][m]; } /** * Check if two strings are similar, but not identical, i.e., have a Levenshtein distance of 1 or 2. * @param string1 first string to compare * @param string2 second string to compare * @return true if the normalized strings are different but only a "little bit" * @see #getLevenshteinDistance * @since 14371 */ public static boolean isSimilar(String string1, String string2) { // check plain strings int distance = getLevenshteinDistance(string1, string2); // check if only the case differs, so we don't consider large distance as different strings if (distance > 2 && string1.length() == string2.length()) { return deAccent(string1).equalsIgnoreCase(deAccent(string2)); } else { return distance > 0 && distance <= 2; } } /** * A ForkJoinWorkerThread that will always inherit caller permissions, * unlike JDK's InnocuousForkJoinWorkerThread, used if a security manager exists.
• ## trunk/test/unit/org/openstreetmap/josm/tools/UtilsTest.java

 r13520 assertEquals("Empty on null stream", 0, Utils.readBytesFromStream(null).length); } /** * Test of {@link Utils#getLevenshteinDistance} method. */ @Test public void testLevenshteinDistance() { assertEquals(0, Utils.getLevenshteinDistance("foo", "foo")); assertEquals(3, Utils.getLevenshteinDistance("foo", "bar")); assertEquals(1, Utils.getLevenshteinDistance("bar", "baz")); } /** * Test of {@link Utils#isSimilar} method. */ @Test public void testIsSimilar() { assertFalse(Utils.isSimilar("foo", "foo")); assertFalse(Utils.isSimilar("foo", "bar")); assertTrue(Utils.isSimilar("bar", "baz")); assertTrue(Utils.isSimilar("bar", "baz")); assertTrue(Utils.isSimilar("Rua São João", "Rua SAO Joao")); } }
Note: See TracChangeset for help on using the changeset viewer.