Context Navigation

source: josm/trunk/src/org/openstreetmap/josm/data/validation/tests/SimilarNamedWays.java@ 11747

Last change on this file since 11747 was 11747, checked in by Don-vip, 7 years ago
checkstyle - NoWhiteSpaceBefore `...`
Property svn:eol-style set to `native`
File size: 9.7 KB

Line
1	// License: GPL. For details, see LICENSE file.
2	package org.openstreetmap.josm.data.validation.tests;
3
4	import static java.util.regex.Pattern.CASE_INSENSITIVE;
5	import static java.util.regex.Pattern.UNICODE_CASE;
6	import static org.openstreetmap.josm.tools.I18n.tr;
7
8	import java.awt.geom.Point2D;
9	import java.util.ArrayList;
10	import java.util.Arrays;
11	import java.util.HashMap;
12	import java.util.List;
13	import java.util.Locale;
14	import java.util.Map;
15	import java.util.regex.Matcher;
16	import java.util.regex.Pattern;
17
18	import org.openstreetmap.josm.data.osm.OsmPrimitive;
19	import org.openstreetmap.josm.data.osm.Way;
20	import org.openstreetmap.josm.data.validation.Severity;
21	import org.openstreetmap.josm.data.validation.Test;
22	import org.openstreetmap.josm.data.validation.TestError;
23	import org.openstreetmap.josm.data.validation.util.ValUtil;
24	import org.openstreetmap.josm.gui.progress.ProgressMonitor;
25	import org.openstreetmap.josm.tools.MultiMap;
26
27	/**
28	* Checks for similar named ways, symptom of a possible typo. It uses the
29	* Levenshtein distance to check for similarity
30	*
31	* @author frsantos
32	*/
33	public class SimilarNamedWays extends Test {
34
35	protected static final int SIMILAR_NAMED = 701;
36
37	/** All ways, grouped by cells */
38	private Map<Point2D, List<Way>> cellWays;
39	/** The already detected errors */
40	private MultiMap<Way, Way> errorWays;
41
42	private final List<NormalizeRule> rules = new ArrayList<>();
43
44	/**
45	* Constructor
46	*/
47	public SimilarNamedWays() {
48	super(tr("Similarly named ways"),
49	tr("This test checks for ways with similar names that may have been misspelled."));
50
51	// FIXME: hardcode these rules for now. Replace them with preferences later
52	// See https://josm.openstreetmap.de/ticket/3733#comment:19
53	addRegExprRule("\\d+", "0"); // Highway 66
54	addRegExprRule("\\d+(st\|nd\|rd\|th)", "0st"); // 3rd Ave
55	addRegExprRule("^[A-Z] ", "X"); // E Street
56	addSynonyms("east", "west", "north", "south");
57	addSynonyms("first", "second", "third");
58	}
59
60	@Override
61	public void startTest(ProgressMonitor monitor) {
62	super.startTest(monitor);
63	cellWays = new HashMap<>(1000);
64	errorWays = new MultiMap<>();
65	}
66
67	@Override
68	public void endTest() {
69	cellWays = null;
70	errorWays = null;
71	super.endTest();
72	}
73
74	@Override
75	public void visit(Way w) {
76	if (!w.isUsable())
77	return;
78
79	String name = w.get("name");
80	if (name == null \|\| name.length() < 6)
81	return;
82
83	List<List<Way>> theCellWays = ValUtil.getWaysInCell(w, cellWays);
84	for (List<Way> ways : theCellWays) {
85	for (Way w2 : ways) {
86	if (errorWays.contains(w, w2) \|\| errorWays.contains(w2, w)) {
87	continue;
88	}
89
90	String name2 = w2.get("name");
91	if (name2 == null \|\| name2.length() < 6) {
92	continue;
93	}
94
95	if (similaryName(name, name2)) {
96	List<OsmPrimitive> primitives = new ArrayList<>(2);
97	primitives.add(w);
98	primitives.add(w2);
99	errors.add(TestError.builder(this, Severity.WARNING, SIMILAR_NAMED)
100	.message(tr("Similarly named ways"))
101	.primitives(primitives)
102	.build());
103	errorWays.put(w, w2);
104	}
105	}
106	ways.add(w);
107	}
108	}
109
110	/**
111	* Compute Levenshtein distance
112	*
113	* @param s First word
114	* @param t Second word
115	* @return The distance between words
116	*/
117	public static int getLevenshteinDistance(String s, String t) {
118	int[][] d; // matrix
119	int n; // length of s
120	int m; // length of t
121	int i; // iterates through s
122	int j; // iterates through t
123	char si; // ith character of s
124	char tj; // jth character of t
125	int cost; // cost
126
127	// Step 1
128	n = s.length();
129	m = t.length();
130	if (n == 0)
131	return m;
132	if (m == 0)
133	return n;
134	d = new int[n+1][m+1];
135
136	// Step 2
137	for (i = 0; i <= n; i++) {
138	d[i][0] = i;
139	}
140	for (j = 0; j <= m; j++) {
141	d[0][j] = j;
142	}
143
144	// Step 3
145	for (i = 1; i <= n; i++) {
146
147	si = s.charAt(i - 1);
148
149	// Step 4
150	for (j = 1; j <= m; j++) {
151
152	tj = t.charAt(j - 1);
153
154	// Step 5
155	if (si == tj) {
156	cost = 0;
157	} else {
158	cost = 1;
159	}
160
161	// Step 6
162	d[i][j] = Math.min(Math.min(d[i - 1][j] + 1, d[i][j - 1] + 1), d[i - 1][j - 1] + cost);
163	}
164	}
165
166	// Step 7
167	return d[n][m];
168	}
169
170	/**
171	* Add a regular expression rule.
172	* @param regExpr the regular expression to search for
173	* @param replacement a string to replace with, which should match the expression.
174	*/
175	public void addRegExprRule(String regExpr, String replacement) {
176	rules.add(new RegExprRule(regExpr, replacement));
177	}
178
179	/**
180	* Add a rule with synonym words.
181	* @param words words which are synonyms
182	*/
183	public void addSynonyms(String... words) {
184	for (String word : words) {
185	rules.add(new SynonymRule(word, words));
186	}
187	}
188
189	/**
190	* Check if two names are similar, but not identical. First both names will be "normalized".
191	* Afterwards the Levenshtein distance will be calculated.<br>
192	* Examples for normalization rules:<br>
193	* <code>replaceAll("\\d+", "0")</code><br>
194	* would cause similaryName("track 1", "track 2") = false, but similaryName("Track 1", "track 2") = true
195	* @param name first name to compare
196	* @param name2 second name to compare
197	* @return true if the normalized names are different but only a "little bit"
198	*/
199	public boolean similaryName(String name, String name2) {
200	// check plain strings
201	int distance = getLevenshteinDistance(name, name2);
202	boolean similar = distance > 0 && distance <= 2;
203
204	// try all rules
205	for (NormalizeRule rule : rules) {
206	int levenshteinDistance = getLevenshteinDistance(rule.normalize(name), rule.normalize(name2));
207	if (levenshteinDistance == 0)
208	// one rule results in identical names: identical
209	return false;
210	else if (levenshteinDistance <= 2) {
211	// 0 < distance <= 2
212	similar = true;
213	}
214	}
215	return similar;
216	}
217
218	@FunctionalInterface
219	public interface NormalizeRule {
220
221	/**
222	* Normalize the string by replacing parts.
223	* @param name name to normalize
224	* @return normalized string
225	*/
226	String normalize(String name);
227	}
228
229	public static class RegExprRule implements NormalizeRule {
230	private final Pattern regExpr;
231	private final String replacement;
232
233	public RegExprRule(String expression, String replacement) {
234	this.regExpr = Pattern.compile(expression);
235	this.replacement = replacement;
236	}
237
238	@Override
239	public String normalize(String name) {
240	return regExpr.matcher(name).replaceAll(replacement);
241	}
242
243	@Override
244	public String toString() {
245	return "replaceAll(" + regExpr + ", " + replacement + ')';
246	}
247	}
248
249	public static class SynonymRule implements NormalizeRule {
250
251	private final String[] words;
252	private final Pattern regExpr;
253	private final String replacement;
254
255	public SynonymRule(String replacement, String... words) {
256	this.replacement = replacement.toLowerCase(Locale.ENGLISH);
257	this.words = words;
258
259	// build regular expression for other words (for fast match)
260	StringBuilder expression = new StringBuilder();
261	int maxLength = 0;
262	for (int i = 0; i < words.length; i++) {
263	if (words[i].length() > maxLength) {
264	maxLength = words[i].length();
265	}
266	if (expression.length() > 0) {
267	expression.append('\|');
268	}
269	expression.append(Pattern.quote(words[i]));
270	}
271	this.regExpr = Pattern.compile(expression.toString(), CASE_INSENSITIVE + UNICODE_CASE);
272	}
273
274	@Override
275	public String normalize(String name) {
276	// find first match
277	Matcher matcher = regExpr.matcher(name);
278	if (!matcher.find())
279	return name;
280
281	int start = matcher.start();
282
283	// which word matches?
284	String part = "";
285	for (int i = 0; i < words.length; i++) {
286	String word = words[i];
287	part = name.substring(start, start + word.length());
288	if (word.equalsIgnoreCase(part)) {
289	break;
290	}
291	}
292
293	// replace the word
294	char[] newName = matcher.replaceFirst(replacement).toCharArray();
295
296	// adjust case (replacement is not shorter than matching word!)
297	int minLength = Math.min(replacement.length(), part.length());
298	for (int i = 0; i < minLength; i++) {
299	if (Character.isUpperCase(part.charAt(i))) {
300	newName[start + i] = Character.toUpperCase(newName[start + i]);
301	}
302	}
303
304	return new String(newName);
305	}
306
307	@Override
308	public String toString() {
309	return "synonyms(" + replacement + ", " + Arrays.toString(words) + ')';
310	}
311	}
312	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: