1 | // License: GPL. See LICENSE file for details.
|
---|
2 | package org.openstreetmap.josm.data.validation.tests;
|
---|
3 |
|
---|
4 | import static org.openstreetmap.josm.tools.I18n.tr;
|
---|
5 |
|
---|
6 | import java.awt.geom.Point2D;
|
---|
7 | import java.util.ArrayList;
|
---|
8 | import java.util.HashMap;
|
---|
9 | import java.util.List;
|
---|
10 | import java.util.Map;
|
---|
11 |
|
---|
12 | import org.openstreetmap.josm.data.osm.OsmPrimitive;
|
---|
13 | import org.openstreetmap.josm.data.osm.Way;
|
---|
14 | import org.openstreetmap.josm.data.validation.Severity;
|
---|
15 | import org.openstreetmap.josm.data.validation.Test;
|
---|
16 | import org.openstreetmap.josm.data.validation.TestError;
|
---|
17 | import org.openstreetmap.josm.data.validation.util.ValUtil;
|
---|
18 | import org.openstreetmap.josm.gui.progress.ProgressMonitor;
|
---|
19 | import org.openstreetmap.josm.tools.MultiMap;
|
---|
20 | import org.openstreetmap.josm.tools.Utils;
|
---|
21 |
|
---|
22 | /**
|
---|
23 | * Checks for similar named ways, symptom of a possible typo. It uses the
|
---|
24 | * Levenshtein distance to check for similarity
|
---|
25 | *
|
---|
26 | * @author frsantos
|
---|
27 | */
|
---|
28 | public class SimilarNamedWays extends Test {
|
---|
29 |
|
---|
30 | protected static int SIMILAR_NAMED = 701;
|
---|
31 |
|
---|
32 | /** All ways, grouped by cells */
|
---|
33 | Map<Point2D,List<Way>> cellWays;
|
---|
34 | /** The already detected errors */
|
---|
35 | MultiMap<Way, Way> errorWays;
|
---|
36 |
|
---|
37 | /**
|
---|
38 | * Constructor
|
---|
39 | */
|
---|
40 | public SimilarNamedWays() {
|
---|
41 | super(tr("Similarly named ways")+".",
|
---|
42 | tr("This test checks for ways with similar names that may have been misspelled."));
|
---|
43 | }
|
---|
44 |
|
---|
45 | @Override
|
---|
46 | public void startTest(ProgressMonitor monitor) {
|
---|
47 | super.startTest(monitor);
|
---|
48 | cellWays = new HashMap<Point2D,List<Way>>(1000);
|
---|
49 | errorWays = new MultiMap<Way, Way>();
|
---|
50 | }
|
---|
51 |
|
---|
52 | @Override
|
---|
53 | public void endTest() {
|
---|
54 | cellWays = null;
|
---|
55 | errorWays = null;
|
---|
56 | super.endTest();
|
---|
57 | }
|
---|
58 |
|
---|
59 | @Override
|
---|
60 | public void visit(Way w) {
|
---|
61 | if (!w.isUsable())
|
---|
62 | return;
|
---|
63 |
|
---|
64 | String name = w.get("name");
|
---|
65 | if (name == null || name.length() < 6)
|
---|
66 | return;
|
---|
67 |
|
---|
68 | List<List<Way>> theCellWays = ValUtil.getWaysInCell(w, cellWays);
|
---|
69 | for (List<Way> ways : theCellWays) {
|
---|
70 | for (Way w2 : ways) {
|
---|
71 | if (errorWays.contains(w, w2) || errorWays.contains(w2, w)) {
|
---|
72 | continue;
|
---|
73 | }
|
---|
74 |
|
---|
75 | String name2 = w2.get("name");
|
---|
76 | if (name2 == null || name2.length() < 6) {
|
---|
77 | continue;
|
---|
78 | }
|
---|
79 |
|
---|
80 | int levenshteinDistance = getLevenshteinDistance(name, name2);
|
---|
81 | if (0 < levenshteinDistance && levenshteinDistance <= 2) {
|
---|
82 | List<OsmPrimitive> primitives = new ArrayList<OsmPrimitive>();
|
---|
83 | primitives.add(w);
|
---|
84 | primitives.add(w2);
|
---|
85 | errors.add(new TestError(this, Severity.WARNING, tr("Similarly named ways"), SIMILAR_NAMED, primitives));
|
---|
86 | errorWays.put(w, w2);
|
---|
87 | }
|
---|
88 | }
|
---|
89 | ways.add(w);
|
---|
90 | }
|
---|
91 | }
|
---|
92 |
|
---|
93 | /**
|
---|
94 | * Compute Levenshtein distance
|
---|
95 | *
|
---|
96 | * @param s First word
|
---|
97 | * @param t Second word
|
---|
98 | * @return The distance between words
|
---|
99 | */
|
---|
100 | public int getLevenshteinDistance(String s, String t) {
|
---|
101 | int d[][]; // matrix
|
---|
102 | int n; // length of s
|
---|
103 | int m; // length of t
|
---|
104 | int i; // iterates through s
|
---|
105 | int j; // iterates through t
|
---|
106 | char s_i; // ith character of s
|
---|
107 | char t_j; // jth character of t
|
---|
108 | int cost; // cost
|
---|
109 |
|
---|
110 | // Step 1
|
---|
111 | n = s.length();
|
---|
112 | m = t.length();
|
---|
113 | if (n == 0)
|
---|
114 | return m;
|
---|
115 | if (m == 0)
|
---|
116 | return n;
|
---|
117 | d = new int[n + 1][m + 1];
|
---|
118 |
|
---|
119 | // Step 2
|
---|
120 | for (i = 0; i <= n; i++) {
|
---|
121 | d[i][0] = i;
|
---|
122 | }
|
---|
123 | for (j = 0; j <= m; j++) {
|
---|
124 | d[0][j] = j;
|
---|
125 | }
|
---|
126 |
|
---|
127 | // Step 3
|
---|
128 | for (i = 1; i <= n; i++) {
|
---|
129 |
|
---|
130 | s_i = s.charAt(i - 1);
|
---|
131 |
|
---|
132 | // Step 4
|
---|
133 | for (j = 1; j <= m; j++) {
|
---|
134 |
|
---|
135 | t_j = t.charAt(j - 1);
|
---|
136 |
|
---|
137 | // Step 5
|
---|
138 | if (s_i == t_j) {
|
---|
139 | cost = 0;
|
---|
140 | } else {
|
---|
141 | cost = 1;
|
---|
142 | }
|
---|
143 |
|
---|
144 | // Step 6
|
---|
145 | d[i][j] = Utils.min(d[i - 1][j] + 1, d[i][j - 1] + 1, d[i - 1][j - 1] + cost);
|
---|
146 | }
|
---|
147 | }
|
---|
148 |
|
---|
149 | // Step 7
|
---|
150 | return d[n][m];
|
---|
151 | }
|
---|
152 | }
|
---|