Context Navigation

source: josm/trunk/src/org/openstreetmap/josm/data/validation/tests/SimilarNamedWays.java@ 9070

Last change on this file since 9070 was 9070, checked in by Don-vip, 8 years ago
Sonar - squid:S2293 - The diamond operator ("<>") should be used
Property svn:eol-style set to `native`
File size: 9.6 KB

Line
1	// License: GPL. For details, see LICENSE file.
2	package org.openstreetmap.josm.data.validation.tests;
3
4	import static java.util.regex.Pattern.CASE_INSENSITIVE;
5	import static java.util.regex.Pattern.UNICODE_CASE;
6	import static org.openstreetmap.josm.tools.I18n.tr;
7
8	import java.awt.geom.Point2D;
9	import java.util.ArrayList;
10	import java.util.Arrays;
11	import java.util.HashMap;
12	import java.util.List;
13	import java.util.Locale;
14	import java.util.Map;
15	import java.util.regex.Matcher;
16	import java.util.regex.Pattern;
17
18	import org.openstreetmap.josm.data.osm.OsmPrimitive;
19	import org.openstreetmap.josm.data.osm.Way;
20	import org.openstreetmap.josm.data.validation.Severity;
21	import org.openstreetmap.josm.data.validation.Test;
22	import org.openstreetmap.josm.data.validation.TestError;
23	import org.openstreetmap.josm.data.validation.util.ValUtil;
24	import org.openstreetmap.josm.gui.progress.ProgressMonitor;
25	import org.openstreetmap.josm.tools.MultiMap;
26	import org.openstreetmap.josm.tools.Utils;
27
28	/**
29	* Checks for similar named ways, symptom of a possible typo. It uses the
30	* Levenshtein distance to check for similarity
31	*
32	* @author frsantos
33	*/
34	public class SimilarNamedWays extends Test {
35
36	protected static final int SIMILAR_NAMED = 701;
37
38	/** All ways, grouped by cells */
39	private Map<Point2D, List<Way>> cellWays;
40	/** The already detected errors */
41	private MultiMap<Way, Way> errorWays;
42
43	private final List<NormalizeRule> rules = new ArrayList<>();
44
45	/**
46	* Constructor
47	*/
48	public SimilarNamedWays() {
49	super(tr("Similarly named ways"),
50	tr("This test checks for ways with similar names that may have been misspelled."));
51
52	// FIXME: hardcode these rules for now. Replace them with preferences later
53	// See https://josm.openstreetmap.de/ticket/3733#comment:19
54	addRegExprRule("\\d+", "0"); // Highway 66
55	addRegExprRule("\\d+(st\|nd\|rd\|th)", "0st"); // 3rd Ave
56	addRegExprRule("^[A-Z] ", "X"); // E Street
57	addSynonyms("east", "west", "north", "south");
58	addSynonyms("first", "second", "third");
59	}
60
61	@Override
62	public void startTest(ProgressMonitor monitor) {
63	super.startTest(monitor);
64	cellWays = new HashMap<>(1000);
65	errorWays = new MultiMap<>();
66	}
67
68	@Override
69	public void endTest() {
70	cellWays = null;
71	errorWays = null;
72	super.endTest();
73	}
74
75	@Override
76	public void visit(Way w) {
77	if (!w.isUsable())
78	return;
79
80	String name = w.get("name");
81	if (name == null \|\| name.length() < 6)
82	return;
83
84	List<List<Way>> theCellWays = ValUtil.getWaysInCell(w, cellWays);
85	for (List<Way> ways : theCellWays) {
86	for (Way w2 : ways) {
87	if (errorWays.contains(w, w2) \|\| errorWays.contains(w2, w)) {
88	continue;
89	}
90
91	String name2 = w2.get("name");
92	if (name2 == null \|\| name2.length() < 6) {
93	continue;
94	}
95
96	if (similaryName(name, name2)) {
97	List<OsmPrimitive> primitives = new ArrayList<>(2);
98	primitives.add(w);
99	primitives.add(w2);
100	errors.add(new TestError(this, Severity.WARNING, tr("Similarly named ways"), SIMILAR_NAMED, primitives));
101	errorWays.put(w, w2);
102	}
103	}
104	ways.add(w);
105	}
106	}
107
108	/**
109	* Compute Levenshtein distance
110	*
111	* @param s First word
112	* @param t Second word
113	* @return The distance between words
114	*/
115	public static int getLevenshteinDistance(String s, String t) {
116	int[][] d; // matrix
117	int n; // length of s
118	int m; // length of t
119	int i; // iterates through s
120	int j; // iterates through t
121	char s_i; // ith character of s
122	char t_j; // jth character of t
123	int cost; // cost
124
125	// Step 1
126	n = s.length();
127	m = t.length();
128	if (n == 0)
129	return m;
130	if (m == 0)
131	return n;
132	d = new int[n+1][m+1];
133
134	// Step 2
135	for (i = 0; i <= n; i++) {
136	d[i][0] = i;
137	}
138	for (j = 0; j <= m; j++) {
139	d[0][j] = j;
140	}
141
142	// Step 3
143	for (i = 1; i <= n; i++) {
144
145	s_i = s.charAt(i - 1);
146
147	// Step 4
148	for (j = 1; j <= m; j++) {
149
150	t_j = t.charAt(j - 1);
151
152	// Step 5
153	if (s_i == t_j) {
154	cost = 0;
155	} else {
156	cost = 1;
157	}
158
159	// Step 6
160	d[i][j] = Utils.min(d[i - 1][j] + 1, d[i][j - 1] + 1, d[i - 1][j - 1] + cost);
161	}
162	}
163
164	// Step 7
165	return d[n][m];
166	}
167
168	/**
169	* Add a regular expression rule.
170	* @param regExpr the regular expression to search for
171	* @param replacement a string to replace with, which should match the expression.
172	*/
173	public void addRegExprRule(String regExpr, String replacement) {
174	rules.add(new RegExprRule(regExpr, replacement));
175	}
176
177	/**
178	* Add a rule with synonym words.
179	* @param words words which are synonyms
180	*/
181	public void addSynonyms(String... words) {
182	for (String word : words) {
183	rules.add(new SynonymRule(word, words));
184	}
185	}
186
187	/**
188	* Check if two names are similar, but not identical. First both names will be "normalized".
189	* Afterwards the Levenshtein distance will be calculated.<br>
190	* Examples for normalization rules:<br>
191	* <code>replaceAll("\\d+", "0")</code><br>
192	* would cause similaryName("track 1", "track 2") = false, but similaryName("Track 1", "track 2") = true
193	* @param name first name to compare
194	* @param name2 second name to compare
195	* @return true if the normalized names are different but only a "little bit"
196	*/
197	public boolean similaryName(String name, String name2) {
198	// check plain strings
199	int distance = getLevenshteinDistance(name, name2);
200	boolean similar = distance > 0 && distance <= 2;
201
202	// try all rules
203	for (NormalizeRule rule : rules) {
204	int levenshteinDistance = getLevenshteinDistance(rule.normalize(name), rule.normalize(name2));
205	if (levenshteinDistance == 0)
206	// one rule results in identical names: identical
207	return false;
208	else if (levenshteinDistance <= 2) {
209	// 0 < distance <= 2
210	similar = true;
211	}
212	}
213	return similar;
214	}
215
216	public interface NormalizeRule {
217
218	/**
219	* Normalize the string by replacing parts.
220	* @param name name to normalize
221	* @return normalized string
222	*/
223	String normalize(String name);
224	}
225
226	public static class RegExprRule implements NormalizeRule {
227	private final Pattern regExpr;
228	private final String replacement;
229
230	public RegExprRule(String expression, String replacement) {
231	this.regExpr = Pattern.compile(expression);
232	this.replacement = replacement;
233	}
234
235	@Override
236	public String normalize(String name) {
237	return regExpr.matcher(name).replaceAll(replacement);
238	}
239
240	@Override
241	public String toString() {
242	return "replaceAll(" + regExpr + ", " + replacement + ')';
243	}
244	}
245
246	public static class SynonymRule implements NormalizeRule {
247
248	private final String[] words;
249	private final Pattern regExpr;
250	private final String replacement;
251
252	public SynonymRule(String replacement, String[] words) {
253	this.replacement = replacement.toLowerCase(Locale.ENGLISH);
254	this.words = words;
255
256	// build regular expression for other words (for fast match)
257	StringBuilder expression = new StringBuilder();
258	int maxLength = 0;
259	for (int i = 0; i < words.length; i++) {
260	if (words[i].length() > maxLength) {
261	maxLength = words[i].length();
262	}
263	if (expression.length() > 0) {
264	expression.append('\|');
265	}
266	expression.append(Pattern.quote(words[i]));
267	}
268	this.regExpr = Pattern.compile(expression.toString(), CASE_INSENSITIVE + UNICODE_CASE);
269	}
270
271	@Override
272	public String normalize(String name) {
273	// find first match
274	Matcher matcher = regExpr.matcher(name);
275	if (!matcher.find())
276	return name;
277
278	int start = matcher.start();
279
280	// which word matches?
281	String part = "";
282	for (int i = 0; i < words.length; i++) {
283	String word = words[i];
284	part = name.substring(start, start + word.length());
285	if (word.equalsIgnoreCase(part)) {
286	break;
287	}
288	}
289
290	// replace the word
291	char[] newName = matcher.replaceFirst(replacement).toCharArray();
292
293	// adjust case (replacement is not shorter than matching word!)
294	int minLength = Math.min(replacement.length(), part.length());
295	for (int i = 0; i < minLength; i++) {
296	if (Character.isUpperCase(part.charAt(i))) {
297	newName[start + i] = Character.toUpperCase(newName[start + i]);
298	}
299	}
300
301	return new String(newName);
302	}
303
304	@Override
305	public String toString() {
306	return "synonyms(" + replacement + ", " + Arrays.toString(words) + ')';
307	}
308	}
309	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: