source: josm/trunk/src/org/openstreetmap/josm/data/validation/tests/SimilarNamedWays.java@ 3775

Last change on this file since 3775 was 3674, checked in by bastiK, 13 years ago

Replace Bag by MultiMap.
Both collections are quite similar, but there is a technical difference: Bag is backed by an ArrayList and MultiMap by a LinkedHashSet. So with a Bag you could map a key multiple times to a certain value. I haven't found any usage where this behaviour is required or intended.
Also there could be a certain performance drawback with LinkedHashSet compared to ArrayList. However in the typical validator use case the value collection represents duplicate or crossing ways, so the number of elements is usually 1 or 2 and mostly smaller than the initial capacity (LHS=16, AL=10). (But I could have overlooked some use cases.)

  • Property svn:eol-style set to native
File size: 4.2 KB
Line 
1// License: GPL. See LICENSE file for details.
2package org.openstreetmap.josm.data.validation.tests;
3
4import static org.openstreetmap.josm.tools.I18n.tr;
5
6import java.awt.geom.Point2D;
7import java.util.ArrayList;
8import java.util.HashMap;
9import java.util.List;
10import java.util.Map;
11
12import org.openstreetmap.josm.data.osm.OsmPrimitive;
13import org.openstreetmap.josm.data.osm.Way;
14import org.openstreetmap.josm.data.validation.Severity;
15import org.openstreetmap.josm.data.validation.Test;
16import org.openstreetmap.josm.data.validation.TestError;
17import org.openstreetmap.josm.data.validation.util.ValUtil;
18import org.openstreetmap.josm.gui.progress.ProgressMonitor;
19import org.openstreetmap.josm.tools.MultiMap;
20import org.openstreetmap.josm.tools.Utils;
21
22/**
23 * Checks for similar named ways, symptom of a possible typo. It uses the
24 * Levenshtein distance to check for similarity
25 *
26 * @author frsantos
27 */
28public class SimilarNamedWays extends Test {
29
30 protected static int SIMILAR_NAMED = 701;
31
32 /** All ways, grouped by cells */
33 Map<Point2D,List<Way>> cellWays;
34 /** The already detected errors */
35 MultiMap<Way, Way> errorWays;
36
37 /**
38 * Constructor
39 */
40 public SimilarNamedWays() {
41 super(tr("Similarly named ways")+".",
42 tr("This test checks for ways with similar names that may have been misspelled."));
43 }
44
45 @Override
46 public void startTest(ProgressMonitor monitor) {
47 super.startTest(monitor);
48 cellWays = new HashMap<Point2D,List<Way>>(1000);
49 errorWays = new MultiMap<Way, Way>();
50 }
51
52 @Override
53 public void endTest() {
54 cellWays = null;
55 errorWays = null;
56 super.endTest();
57 }
58
59 @Override
60 public void visit(Way w) {
61 if (!w.isUsable())
62 return;
63
64 String name = w.get("name");
65 if (name == null || name.length() < 6)
66 return;
67
68 List<List<Way>> theCellWays = ValUtil.getWaysInCell(w, cellWays);
69 for (List<Way> ways : theCellWays) {
70 for (Way w2 : ways) {
71 if (errorWays.contains(w, w2) || errorWays.contains(w2, w)) {
72 continue;
73 }
74
75 String name2 = w2.get("name");
76 if (name2 == null || name2.length() < 6) {
77 continue;
78 }
79
80 int levenshteinDistance = getLevenshteinDistance(name, name2);
81 if (0 < levenshteinDistance && levenshteinDistance <= 2) {
82 List<OsmPrimitive> primitives = new ArrayList<OsmPrimitive>();
83 primitives.add(w);
84 primitives.add(w2);
85 errors.add(new TestError(this, Severity.WARNING, tr("Similarly named ways"), SIMILAR_NAMED, primitives));
86 errorWays.put(w, w2);
87 }
88 }
89 ways.add(w);
90 }
91 }
92
93 /**
94 * Compute Levenshtein distance
95 *
96 * @param s First word
97 * @param t Second word
98 * @return The distance between words
99 */
100 public int getLevenshteinDistance(String s, String t) {
101 int d[][]; // matrix
102 int n; // length of s
103 int m; // length of t
104 int i; // iterates through s
105 int j; // iterates through t
106 char s_i; // ith character of s
107 char t_j; // jth character of t
108 int cost; // cost
109
110 // Step 1
111 n = s.length();
112 m = t.length();
113 if (n == 0)
114 return m;
115 if (m == 0)
116 return n;
117 d = new int[n + 1][m + 1];
118
119 // Step 2
120 for (i = 0; i <= n; i++) {
121 d[i][0] = i;
122 }
123 for (j = 0; j <= m; j++) {
124 d[0][j] = j;
125 }
126
127 // Step 3
128 for (i = 1; i <= n; i++) {
129
130 s_i = s.charAt(i - 1);
131
132 // Step 4
133 for (j = 1; j <= m; j++) {
134
135 t_j = t.charAt(j - 1);
136
137 // Step 5
138 if (s_i == t_j) {
139 cost = 0;
140 } else {
141 cost = 1;
142 }
143
144 // Step 6
145 d[i][j] = Utils.min(d[i - 1][j] + 1, d[i][j - 1] + 1, d[i - 1][j - 1] + cost);
146 }
147 }
148
149 // Step 7
150 return d[n][m];
151 }
152}
Note: See TracBrowser for help on using the repository browser.