source: josm/trunk/src/org/openstreetmap/josm/data/validation/tests/SimilarNamedWays.java

Last change on this file was 17374, checked in by GerdP, 3 years ago

see #20167: [patch] Improve code readability by replacing indexed loops with foreach
Patch by gaben, slightly modified
I removed the changes for

  • GpxImageCorrelation.java, they introduce a TODO
  • ConnectivityRelations.java (no improvement in readability)
  • Property svn:eol-style set to native
File size: 9.2 KB
Line 
1// License: GPL. For details, see LICENSE file.
2package org.openstreetmap.josm.data.validation.tests;
3
4import static java.util.regex.Pattern.CASE_INSENSITIVE;
5import static java.util.regex.Pattern.UNICODE_CASE;
6import static org.openstreetmap.josm.tools.I18n.tr;
7
8import java.awt.geom.Point2D;
9import java.util.ArrayList;
10import java.util.Arrays;
11import java.util.HashMap;
12import java.util.List;
13import java.util.Locale;
14import java.util.Map;
15import java.util.regex.Matcher;
16import java.util.regex.Pattern;
17
18import org.openstreetmap.josm.data.osm.OsmPrimitive;
19import org.openstreetmap.josm.data.osm.Way;
20import org.openstreetmap.josm.data.validation.Severity;
21import org.openstreetmap.josm.data.validation.Test;
22import org.openstreetmap.josm.data.validation.TestError;
23import org.openstreetmap.josm.data.validation.util.ValUtil;
24import org.openstreetmap.josm.gui.progress.ProgressMonitor;
25import org.openstreetmap.josm.tools.MultiMap;
26import org.openstreetmap.josm.tools.Utils;
27
28/**
29 * Checks for similar named ways, symptom of a possible typo. It uses the
30 * Levenshtein distance to check for similarity
31 *
32 * @author frsantos
33 */
34public class SimilarNamedWays extends Test {
35
36 protected static final int SIMILAR_NAMED = 701;
37
38 /** All ways, grouped by cells */
39 private Map<Point2D, List<Way>> cellWays;
40 /** The already detected errors */
41 private MultiMap<Way, Way> errorWays;
42
43 private final List<NormalizeRule> rules = new ArrayList<>();
44
45 /**
46 * Constructor
47 */
48 public SimilarNamedWays() {
49 super(tr("Similarly named ways"),
50 tr("This test checks for ways with similar names that may have been misspelled."));
51
52 // FIXME: hardcode these rules for now. Replace them with preferences later
53 // See https://josm.openstreetmap.de/ticket/3733#comment:19
54 addRegExprRule("\\pN+", "0"); // Unicode numbers: matches "Highway 66" but also persian numbers
55 addRegExprRule("M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$", "0"); // Roman numbers: matches "Building II"
56 addRegExprRule("\\d+(st|nd|rd|th)", "0st"); // 3rd Ave
57 addRegExprRule("^[A-Z] ", "X"); // E Street
58 addSynonyms("east", "west", "north", "south");
59 addSynonyms("first", "second", "third");
60 }
61
62 @Override
63 public void startTest(ProgressMonitor monitor) {
64 super.startTest(monitor);
65 cellWays = new HashMap<>(1000);
66 errorWays = new MultiMap<>();
67 }
68
69 @Override
70 public void endTest() {
71 cellWays = null;
72 errorWays = null;
73 super.endTest();
74 }
75
76 @Override
77 public void visit(Way w) {
78 if (!w.isUsable())
79 return;
80
81 String name = w.get("name");
82 if (name == null || name.length() < 6)
83 return;
84
85 List<List<Way>> theCellWays = ValUtil.getWaysInCell(w, cellWays);
86 for (List<Way> ways : theCellWays) {
87 for (Way w2 : ways) {
88 if (errorWays.contains(w, w2) || errorWays.contains(w2, w)) {
89 continue;
90 }
91
92 String name2 = w2.get("name");
93 if (name2 == null || name2.length() < 6) {
94 continue;
95 }
96
97 if (similaryName(name, name2)) {
98 List<OsmPrimitive> primitives = new ArrayList<>(2);
99 primitives.add(w);
100 primitives.add(w2);
101 errors.add(TestError.builder(this, Severity.WARNING, SIMILAR_NAMED)
102 .message(tr("Similarly named ways"))
103 .primitives(primitives)
104 .build());
105 errorWays.put(w, w2);
106 }
107 }
108 ways.add(w);
109 }
110 }
111
112 /**
113 * Add a regular expression rule.
114 * @param regExpr the regular expression to search for
115 * @param replacement a string to replace with, which should match the expression.
116 */
117 public void addRegExprRule(String regExpr, String replacement) {
118 rules.add(new RegExprRule(regExpr, replacement));
119 }
120
121 /**
122 * Add a rule with synonym words.
123 * @param words words which are synonyms
124 */
125 public void addSynonyms(String... words) {
126 for (String word : words) {
127 rules.add(new SynonymRule(word, words));
128 }
129 }
130
131 /**
132 * Check if two names are similar, but not identical. First both names will be "normalized".
133 * Afterwards the Levenshtein distance will be calculated.<br>
134 * Examples for normalization rules:<br>
135 * <code>replaceAll("\\d+", "0")</code><br>
136 * would cause similaryName("track 1", "track 2") = false, but similaryName("Track 1", "track 2") = true
137 * @param name first name to compare
138 * @param name2 second name to compare
139 * @return true if the normalized names are different but only a "little bit"
140 */
141 public boolean similaryName(String name, String name2) {
142 boolean similar = Utils.isSimilar(name, name2);
143
144 // try all rules
145 for (NormalizeRule rule : rules) {
146 int levenshteinDistance = Utils.getLevenshteinDistance(rule.normalize(name), rule.normalize(name2));
147 if (levenshteinDistance == 0)
148 // one rule results in identical names: identical
149 return false;
150 else if (levenshteinDistance <= 2) {
151 // 0 < distance <= 2
152 similar = true;
153 }
154 }
155 return similar;
156 }
157
158 /**
159 * A normalization that is applied to names before testing them
160 */
161 @FunctionalInterface
162 public interface NormalizeRule {
163
164 /**
165 * Normalize the string by replacing parts.
166 * @param name name to normalize
167 * @return normalized string
168 */
169 String normalize(String name);
170 }
171
172 /**
173 * A rule to replace by regular expression,
174 * so that all strings matching the regular expression are handled as if they were {@link RegExprRule#replacement}
175 */
176 public static class RegExprRule implements NormalizeRule {
177 private final Pattern regExpr;
178 private final String replacement;
179
180 /**
181 * Create a new rule to replace by regular expression
182 * @param expression The regular expression
183 * @param replacement The replacement
184 */
185 public RegExprRule(String expression, String replacement) {
186 this.regExpr = Pattern.compile(expression);
187 this.replacement = replacement;
188 }
189
190 @Override
191 public String normalize(String name) {
192 return regExpr.matcher(name).replaceAll(replacement);
193 }
194
195 @Override
196 public String toString() {
197 return "replaceAll(" + regExpr + ", " + replacement + ')';
198 }
199 }
200
201 /**
202 * A rule that registers synonyms to a given word
203 */
204 public static class SynonymRule implements NormalizeRule {
205
206 private final String[] words;
207 private final Pattern regExpr;
208 private final String replacement;
209
210 /**
211 * Create a new {@link SynonymRule}
212 * @param replacement The word to use instead
213 * @param words The synonyms for that word
214 */
215 public SynonymRule(String replacement, String... words) {
216 this.replacement = replacement.toLowerCase(Locale.ENGLISH);
217 this.words = words;
218
219 // build regular expression for other words (for fast match)
220 StringBuilder expression = new StringBuilder();
221 int maxLength = 0;
222 for (String word : words) {
223 if (word.length() > maxLength) {
224 maxLength = word.length();
225 }
226 if (expression.length() > 0) {
227 expression.append('|');
228 }
229 expression.append(Pattern.quote(word));
230 }
231 this.regExpr = Pattern.compile(expression.toString(), CASE_INSENSITIVE + UNICODE_CASE);
232 }
233
234 @Override
235 public String normalize(String name) {
236 // find first match
237 Matcher matcher = regExpr.matcher(name);
238 if (!matcher.find())
239 return name;
240
241 int start = matcher.start();
242
243 // which word matches?
244 String part = "";
245 for (String word : words) {
246 if (start + word.length() <= name.length()) {
247 part = name.substring(start, start + word.length());
248 }
249 if (word.equalsIgnoreCase(part)) {
250 break;
251 }
252 }
253
254 // replace the word
255 char[] newName = matcher.replaceFirst(replacement).toCharArray();
256
257 // adjust case (replacement is not shorter than matching word!)
258 int minLength = Math.min(replacement.length(), part.length());
259 for (int i = 0; i < minLength; i++) {
260 if (Character.isUpperCase(part.charAt(i))) {
261 newName[start + i] = Character.toUpperCase(newName[start + i]);
262 }
263 }
264
265 return new String(newName);
266 }
267
268 @Override
269 public String toString() {
270 return "synonyms(" + replacement + ", " + Arrays.toString(words) + ')';
271 }
272 }
273}
Note: See TracBrowser for help on using the repository browser.