Context Navigation

source: josm/trunk/src/org/openstreetmap/josm/data/validation/tests/SimilarNamedWays.java@ 14501

Last change on this file since 14501 was 14373, checked in by simon04, 5 years ago
see #15889 - deprecate SimilarNamedWays.getLevenshteinDistance
Property svn:eol-style set to `native`
File size: 9.5 KB

Line
1	// License: GPL. For details, see LICENSE file.
2	package org.openstreetmap.josm.data.validation.tests;
3
4	import static java.util.regex.Pattern.CASE_INSENSITIVE;
5	import static java.util.regex.Pattern.UNICODE_CASE;
6	import static org.openstreetmap.josm.tools.I18n.tr;
7
8	import java.awt.geom.Point2D;
9	import java.util.ArrayList;
10	import java.util.Arrays;
11	import java.util.HashMap;
12	import java.util.List;
13	import java.util.Locale;
14	import java.util.Map;
15	import java.util.regex.Matcher;
16	import java.util.regex.Pattern;
17
18	import org.openstreetmap.josm.data.osm.OsmPrimitive;
19	import org.openstreetmap.josm.data.osm.Way;
20	import org.openstreetmap.josm.data.validation.Severity;
21	import org.openstreetmap.josm.data.validation.Test;
22	import org.openstreetmap.josm.data.validation.TestError;
23	import org.openstreetmap.josm.data.validation.util.ValUtil;
24	import org.openstreetmap.josm.gui.progress.ProgressMonitor;
25	import org.openstreetmap.josm.tools.MultiMap;
26	import org.openstreetmap.josm.tools.Utils;
27
28	/**
29	* Checks for similar named ways, symptom of a possible typo. It uses the
30	* Levenshtein distance to check for similarity
31	*
32	* @author frsantos
33	*/
34	public class SimilarNamedWays extends Test {
35
36	protected static final int SIMILAR_NAMED = 701;
37
38	/** All ways, grouped by cells */
39	private Map<Point2D, List<Way>> cellWays;
40	/** The already detected errors */
41	private MultiMap<Way, Way> errorWays;
42
43	private final List<NormalizeRule> rules = new ArrayList<>();
44
45	/**
46	* Constructor
47	*/
48	public SimilarNamedWays() {
49	super(tr("Similarly named ways"),
50	tr("This test checks for ways with similar names that may have been misspelled."));
51
52	// FIXME: hardcode these rules for now. Replace them with preferences later
53	// See https://josm.openstreetmap.de/ticket/3733#comment:19
54	addRegExprRule("\\pN+", "0"); // Unicode numbers: matches "Highway 66" but also persian numbers
55	addRegExprRule("M{0,4}(CM\|CD\|D?C{0,3})(XC\|XL\|L?X{0,3})(IX\|IV\|V?I{0,3})$", "0"); // Roman numbers: matches "Building II"
56	addRegExprRule("\\d+(st\|nd\|rd\|th)", "0st"); // 3rd Ave
57	addRegExprRule("^[A-Z] ", "X"); // E Street
58	addSynonyms("east", "west", "north", "south");
59	addSynonyms("first", "second", "third");
60	}
61
62	@Override
63	public void startTest(ProgressMonitor monitor) {
64	super.startTest(monitor);
65	cellWays = new HashMap<>(1000);
66	errorWays = new MultiMap<>();
67	}
68
69	@Override
70	public void endTest() {
71	cellWays = null;
72	errorWays = null;
73	super.endTest();
74	}
75
76	@Override
77	public void visit(Way w) {
78	if (!w.isUsable())
79	return;
80
81	String name = w.get("name");
82	if (name == null \|\| name.length() < 6)
83	return;
84
85	List<List<Way>> theCellWays = ValUtil.getWaysInCell(w, cellWays);
86	for (List<Way> ways : theCellWays) {
87	for (Way w2 : ways) {
88	if (errorWays.contains(w, w2) \|\| errorWays.contains(w2, w)) {
89	continue;
90	}
91
92	String name2 = w2.get("name");
93	if (name2 == null \|\| name2.length() < 6) {
94	continue;
95	}
96
97	if (similaryName(name, name2)) {
98	List<OsmPrimitive> primitives = new ArrayList<>(2);
99	primitives.add(w);
100	primitives.add(w2);
101	errors.add(TestError.builder(this, Severity.WARNING, SIMILAR_NAMED)
102	.message(tr("Similarly named ways"))
103	.primitives(primitives)
104	.build());
105	errorWays.put(w, w2);
106	}
107	}
108	ways.add(w);
109	}
110	}
111
112	/**
113	* Compute Levenshtein distance
114	*
115	* @param s First word
116	* @param t Second word
117	* @return The distance between words
118	* @deprecated Use {@link Utils#getLevenshteinDistance} instead
119	*/
120	@Deprecated
121	public static int getLevenshteinDistance(String s, String t) {
122	return Utils.getLevenshteinDistance(s, t);
123	}
124
125	/**
126	* Add a regular expression rule.
127	* @param regExpr the regular expression to search for
128	* @param replacement a string to replace with, which should match the expression.
129	*/
130	public void addRegExprRule(String regExpr, String replacement) {
131	rules.add(new RegExprRule(regExpr, replacement));
132	}
133
134	/**
135	* Add a rule with synonym words.
136	* @param words words which are synonyms
137	*/
138	public void addSynonyms(String... words) {
139	for (String word : words) {
140	rules.add(new SynonymRule(word, words));
141	}
142	}
143
144	/**
145	* Check if two names are similar, but not identical. First both names will be "normalized".
146	* Afterwards the Levenshtein distance will be calculated.<br>
147	* Examples for normalization rules:<br>
148	* <code>replaceAll("\\d+", "0")</code><br>
149	* would cause similaryName("track 1", "track 2") = false, but similaryName("Track 1", "track 2") = true
150	* @param name first name to compare
151	* @param name2 second name to compare
152	* @return true if the normalized names are different but only a "little bit"
153	*/
154	public boolean similaryName(String name, String name2) {
155	boolean similar = Utils.isSimilar(name, name2);
156
157	// try all rules
158	for (NormalizeRule rule : rules) {
159	int levenshteinDistance = Utils.getLevenshteinDistance(rule.normalize(name), rule.normalize(name2));
160	if (levenshteinDistance == 0)
161	// one rule results in identical names: identical
162	return false;
163	else if (levenshteinDistance <= 2) {
164	// 0 < distance <= 2
165	similar = true;
166	}
167	}
168	return similar;
169	}
170
171	/**
172	* A normalization that is applied to names before testing them
173	*/
174	@FunctionalInterface
175	public interface NormalizeRule {
176
177	/**
178	* Normalize the string by replacing parts.
179	* @param name name to normalize
180	* @return normalized string
181	*/
182	String normalize(String name);
183	}
184
185	/**
186	* A rule to replace by regular expression,
187	* so that all strings matching the regular expression are handled as if they were {@link RegExprRule#replacement}
188	*/
189	public static class RegExprRule implements NormalizeRule {
190	private final Pattern regExpr;
191	private final String replacement;
192
193	/**
194	* Create a new rule to replace by regular expression
195	* @param expression The regular expression
196	* @param replacement The replacement
197	*/
198	public RegExprRule(String expression, String replacement) {
199	this.regExpr = Pattern.compile(expression);
200	this.replacement = replacement;
201	}
202
203	@Override
204	public String normalize(String name) {
205	return regExpr.matcher(name).replaceAll(replacement);
206	}
207
208	@Override
209	public String toString() {
210	return "replaceAll(" + regExpr + ", " + replacement + ')';
211	}
212	}
213
214	/**
215	* A rule that registers synonyms to a given word
216	*/
217	public static class SynonymRule implements NormalizeRule {
218
219	private final String[] words;
220	private final Pattern regExpr;
221	private final String replacement;
222
223	/**
224	* Create a new {@link SynonymRule}
225	* @param replacement The word to use instead
226	* @param words The synonyms for that word
227	*/
228	public SynonymRule(String replacement, String... words) {
229	this.replacement = replacement.toLowerCase(Locale.ENGLISH);
230	this.words = words;
231
232	// build regular expression for other words (for fast match)
233	StringBuilder expression = new StringBuilder();
234	int maxLength = 0;
235	for (int i = 0; i < words.length; i++) {
236	if (words[i].length() > maxLength) {
237	maxLength = words[i].length();
238	}
239	if (expression.length() > 0) {
240	expression.append('\|');
241	}
242	expression.append(Pattern.quote(words[i]));
243	}
244	this.regExpr = Pattern.compile(expression.toString(), CASE_INSENSITIVE + UNICODE_CASE);
245	}
246
247	@Override
248	public String normalize(String name) {
249	// find first match
250	Matcher matcher = regExpr.matcher(name);
251	if (!matcher.find())
252	return name;
253
254	int start = matcher.start();
255
256	// which word matches?
257	String part = "";
258	for (int i = 0; i < words.length; i++) {
259	String word = words[i];
260	part = name.substring(start, start + word.length());
261	if (word.equalsIgnoreCase(part)) {
262	break;
263	}
264	}
265
266	// replace the word
267	char[] newName = matcher.replaceFirst(replacement).toCharArray();
268
269	// adjust case (replacement is not shorter than matching word!)
270	int minLength = Math.min(replacement.length(), part.length());
271	for (int i = 0; i < minLength; i++) {
272	if (Character.isUpperCase(part.charAt(i))) {
273	newName[start + i] = Character.toUpperCase(newName[start + i]);
274	}
275	}
276
277	return new String(newName);
278	}
279
280	@Override
281	public String toString() {
282	return "synonyms(" + replacement + ", " + Arrays.toString(words) + ')';
283	}
284	}
285	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: