- Timestamp:
- 2018-10-27T13:34:59+02:00 (6 years ago)
- Location:
- trunk/src/org/openstreetmap/josm
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/org/openstreetmap/josm/data/validation/tests/SimilarNamedWays.java
r13980 r14371 111 111 112 112 /** 113 * Compute Levenshtein distance114 *115 * @param s First word116 * @param t Second word117 * @return The distance between words118 */119 public static int getLevenshteinDistance(String s, String t) {120 int[][] d; // matrix121 int n; // length of s122 int m; // length of t123 int i; // iterates through s124 int j; // iterates through t125 char si; // ith character of s126 char tj; // jth character of t127 int cost; // cost128 129 // Step 1130 n = s.length();131 m = t.length();132 if (n == 0)133 return m;134 if (m == 0)135 return n;136 d = new int[n+1][m+1];137 138 // Step 2139 for (i = 0; i <= n; i++) {140 d[i][0] = i;141 }142 for (j = 0; j <= m; j++) {143 d[0][j] = j;144 }145 146 // Step 3147 for (i = 1; i <= n; i++) {148 149 si = s.charAt(i - 1);150 151 // Step 4152 for (j = 1; j <= m; j++) {153 154 tj = t.charAt(j - 1);155 156 // Step 5157 if (si == tj) {158 cost = 0;159 } else {160 cost = 1;161 }162 163 // Step 6164 d[i][j] = Math.min(Math.min(d[i - 1][j] + 1, d[i][j - 1] + 1), d[i - 1][j - 1] + cost);165 }166 }167 168 // Step 7169 return d[n][m];170 }171 172 /**173 113 * Add a regular expression rule. 174 114 * @param regExpr the regular expression to search for … … 200 140 */ 201 141 public boolean similaryName(String name, String name2) { 202 // check plain strings 203 int distance = getLevenshteinDistance(name, name2); 204 boolean similar = distance > 0 && distance <= 2; 205 206 // check if only the case differs, so we don't consider large distance as different strings 207 if (distance > 2 && name.length() == name2.length()) { 208 similar = Utils.deAccent(name).equalsIgnoreCase(Utils.deAccent(name2)); 209 } 142 boolean similar = Utils.isSimilar(name, name2); 210 143 211 144 // try all rules 212 145 for (NormalizeRule rule : rules) { 213 int levenshteinDistance = getLevenshteinDistance(rule.normalize(name), rule.normalize(name2));146 int levenshteinDistance = Utils.getLevenshteinDistance(rule.normalize(name), rule.normalize(name2)); 214 147 if (levenshteinDistance == 0) 215 148 // one rule results in identical names: identical -
trunk/src/org/openstreetmap/josm/gui/mappaint/mapcss/ExpressionFactory.java
r13811 r14371 877 877 878 878 /** 879 * Check if two strings are similar, but not identical, i.e., have a Levenshtein distance of 1 or 2. 880 * @param string1 first string to compare 881 * @param string2 second string to compare 882 * @return true if the normalized strings are different but only a "little bit" 883 * @see Utils#isSimilar 884 * @since 14371 885 */ 886 public static boolean is_similar(String string1, String string2) { 887 return Utils.isSimilar(string1, string2); 888 } 889 890 /** 879 891 * Percent-decode a string. (See https://en.wikipedia.org/wiki/Percent-encoding) 880 892 * This is especially useful for wikipedia titles -
trunk/src/org/openstreetmap/josm/tools/Utils.java
r14247 r14371 1220 1220 1221 1221 /** 1222 * Compute <a href="https://en.wikipedia.org/wiki/Levenshtein_distance">Levenshtein distance</a> 1223 * 1224 * @param s First word 1225 * @param t Second word 1226 * @return The distance between words 1227 * @since 14371 1228 */ 1229 public static int getLevenshteinDistance(String s, String t) { 1230 int[][] d; // matrix 1231 int n; // length of s 1232 int m; // length of t 1233 int i; // iterates through s 1234 int j; // iterates through t 1235 char si; // ith character of s 1236 char tj; // jth character of t 1237 int cost; // cost 1238 1239 // Step 1 1240 n = s.length(); 1241 m = t.length(); 1242 if (n == 0) 1243 return m; 1244 if (m == 0) 1245 return n; 1246 d = new int[n+1][m+1]; 1247 1248 // Step 2 1249 for (i = 0; i <= n; i++) { 1250 d[i][0] = i; 1251 } 1252 for (j = 0; j <= m; j++) { 1253 d[0][j] = j; 1254 } 1255 1256 // Step 3 1257 for (i = 1; i <= n; i++) { 1258 1259 si = s.charAt(i - 1); 1260 1261 // Step 4 1262 for (j = 1; j <= m; j++) { 1263 1264 tj = t.charAt(j - 1); 1265 1266 // Step 5 1267 if (si == tj) { 1268 cost = 0; 1269 } else { 1270 cost = 1; 1271 } 1272 1273 // Step 6 1274 d[i][j] = Math.min(Math.min(d[i - 1][j] + 1, d[i][j - 1] + 1), d[i - 1][j - 1] + cost); 1275 } 1276 } 1277 1278 // Step 7 1279 return d[n][m]; 1280 } 1281 1282 /** 1283 * Check if two strings are similar, but not identical, i.e., have a Levenshtein distance of 1 or 2. 1284 * @param string1 first string to compare 1285 * @param string2 second string to compare 1286 * @return true if the normalized strings are different but only a "little bit" 1287 * @see #getLevenshteinDistance 1288 * @since 14371 1289 */ 1290 public static boolean isSimilar(String string1, String string2) { 1291 // check plain strings 1292 int distance = getLevenshteinDistance(string1, string2); 1293 1294 // check if only the case differs, so we don't consider large distance as different strings 1295 if (distance > 2 && string1.length() == string2.length()) { 1296 return deAccent(string1).equalsIgnoreCase(deAccent(string2)); 1297 } else { 1298 return distance > 0 && distance <= 2; 1299 } 1300 } 1301 1302 /** 1222 1303 * A ForkJoinWorkerThread that will always inherit caller permissions, 1223 1304 * unlike JDK's InnocuousForkJoinWorkerThread, used if a security manager exists.
Note:
See TracChangeset
for help on using the changeset viewer.