Ticket #7359: extract_levenshtein.patch
File extract_levenshtein.patch, 6.1 KB (added by , 13 years ago) |
---|
-
src/org/openstreetmap/josm/tools/StringMetrics.java
1 package org.openstreetmap.josm.tools; 2 3 public final class StringMetrics { 4 public static InterfaceStringMetric getByName(String name) { 5 if ("levenshtein".equals(name)) { 6 return new LevenshteinStringMetric(); 7 } 8 throw new IllegalArgumentException("Not a valid string metric name"); 9 } 10 11 public interface InterfaceStringMetric { 12 /** 13 * Get string similarity. 1 indicates a perfect match and 0 indicates no match 14 * 15 * @param string1 First string 16 * @param string2 Second string 17 * @return the similarity between the strings normalized 18 */ 19 public float getSimilarity(String string1, String string2); 20 21 public float getUnNormalisedSimilarity(String string1, String string2); 22 } 23 24 /** 25 * Compute Levenshtein distance 26 */ 27 public static class LevenshteinStringMetric implements InterfaceStringMetric { 28 29 @Override 30 public float getSimilarity(String string1, String string2) { 31 throw new UnsupportedOperationException("Not supported yet."); 32 } 33 34 @Override 35 public float getUnNormalisedSimilarity(String string1, String string2) { 36 return getLevenshteinDistance(string1, string2); 37 } 38 39 public int getLevenshteinDistance(String s, String t) { 40 int d[][]; // matrix 41 int n; // length of s 42 int m; // length of t 43 int i; // iterates through s 44 int j; // iterates through t 45 char s_i; // ith character of s 46 char t_j; // jth character of t 47 int cost; // cost 48 49 // Step 1 50 n = s.length(); 51 m = t.length(); 52 if (n == 0) { 53 return m; 54 } 55 if (m == 0) { 56 return n; 57 } 58 d = new int[n + 1][m + 1]; 59 60 // Step 2 61 for (i = 0; i <= n; i++) { 62 d[i][0] = i; 63 } 64 for (j = 0; j <= m; j++) { 65 d[0][j] = j; 66 } 67 68 // Step 3 69 for (i = 1; i <= n; i++) { 70 71 s_i = s.charAt(i - 1); 72 73 // Step 4 74 for (j = 1; j <= m; j++) { 75 76 t_j = t.charAt(j - 1); 77 78 // Step 5 79 if (s_i == t_j) { 80 cost = 0; 81 } else { 82 cost = 1; 83 } 84 85 // Step 6 86 d[i][j] = Utils.min(d[i - 1][j] + 1, d[i][j - 1] + 1, d[i - 1][j - 1] + cost); 87 } 88 } 89 90 // Step 7 91 return d[n][m]; 92 } 93 } 94 } -
src/org/openstreetmap/josm/data/validation/tests/SimilarNamedWays.java
17 17 import org.openstreetmap.josm.data.validation.util.ValUtil; 18 18 import org.openstreetmap.josm.gui.progress.ProgressMonitor; 19 19 import org.openstreetmap.josm.tools.MultiMap; 20 import org.openstreetmap.josm.tools.Utils; 20 import org.openstreetmap.josm.tools.StringMetrics.InterfaceStringMetric; 21 import org.openstreetmap.josm.tools.StringMetrics.LevenshteinStringMetric; 21 22 22 23 /** 23 24 * Checks for similar named ways, symptom of a possible typo. It uses the … … 28 29 public class SimilarNamedWays extends Test { 29 30 30 31 protected static final int SIMILAR_NAMED = 701; 32 protected static final InterfaceStringMetric metric = new LevenshteinStringMetric(); 31 33 32 34 /** All ways, grouped by cells */ 33 35 Map<Point2D,List<Way>> cellWays; … … 77 79 continue; 78 80 } 79 81 80 int levenshteinDistance = getLevenshteinDistance(name, name2);82 float levenshteinDistance = metric.getUnNormalisedSimilarity(name, name2); 81 83 if (0 < levenshteinDistance && levenshteinDistance <= 2) { 82 84 List<OsmPrimitive> primitives = new ArrayList<OsmPrimitive>(); 83 85 primitives.add(w); … … 89 91 ways.add(w); 90 92 } 91 93 } 92 93 /**94 * Compute Levenshtein distance95 *96 * @param s First word97 * @param t Second word98 * @return The distance between words99 */100 public int getLevenshteinDistance(String s, String t) {101 int d[][]; // matrix102 int n; // length of s103 int m; // length of t104 int i; // iterates through s105 int j; // iterates through t106 char s_i; // ith character of s107 char t_j; // jth character of t108 int cost; // cost109 110 // Step 1111 n = s.length();112 m = t.length();113 if (n == 0)114 return m;115 if (m == 0)116 return n;117 d = new int[n + 1][m + 1];118 119 // Step 2120 for (i = 0; i <= n; i++) {121 d[i][0] = i;122 }123 for (j = 0; j <= m; j++) {124 d[0][j] = j;125 }126 127 // Step 3128 for (i = 1; i <= n; i++) {129 130 s_i = s.charAt(i - 1);131 132 // Step 4133 for (j = 1; j <= m; j++) {134 135 t_j = t.charAt(j - 1);136 137 // Step 5138 if (s_i == t_j) {139 cost = 0;140 } else {141 cost = 1;142 }143 144 // Step 6145 d[i][j] = Utils.min(d[i - 1][j] + 1, d[i][j - 1] + 1, d[i - 1][j - 1] + cost);146 }147 }148 149 // Step 7150 return d[n][m];151 }152 94 }