Context Navigation

← Previous Revision
Next Revision →
Blame
Revision Log

SimilarNamedWays.java

Last change on this file was 17374, checked in by GerdP, 5 years ago

see #20167: [patch] Improve code readability by replacing indexed loops with foreach
Patch by gaben, slightly modified
I removed the changes for

GpxImageCorrelation.java, they introduce a TODO
ConnectivityRelations.java (no improvement in readability)

Property svn:eol-style set to native

File size: 9.2 KB

Line
1	// License: GPL. For details, see LICENSE file.
2	package org.openstreetmap.josm.data.validation.tests;
3
4	import static java.util.regex.Pattern.CASE_INSENSITIVE;
5	import static java.util.regex.Pattern.UNICODE_CASE;
6	import static org.openstreetmap.josm.tools.I18n.tr;
7
8	import java.awt.geom.Point2D;
9	import java.util.ArrayList;
10	import java.util.Arrays;
11	import java.util.HashMap;
12	import java.util.List;
13	import java.util.Locale;
14	import java.util.Map;
15	import java.util.regex.Matcher;
16	import java.util.regex.Pattern;
17
18	import org.openstreetmap.josm.data.osm.OsmPrimitive;
19	import org.openstreetmap.josm.data.osm.Way;
20	import org.openstreetmap.josm.data.validation.Severity;
21	import org.openstreetmap.josm.data.validation.Test;
22	import org.openstreetmap.josm.data.validation.TestError;
23	import org.openstreetmap.josm.data.validation.util.ValUtil;
24	import org.openstreetmap.josm.gui.progress.ProgressMonitor;
25	import org.openstreetmap.josm.tools.MultiMap;
26	import org.openstreetmap.josm.tools.Utils;
27
28	/**
29	* Checks for similar named ways, symptom of a possible typo. It uses the
30	* Levenshtein distance to check for similarity
31	*
32	* @author frsantos
33	*/
34	public class SimilarNamedWays extends Test {
35
36	protected static final int SIMILAR_NAMED = 701;
37
38	/** All ways, grouped by cells */
39	private Map<Point2D, List<Way>> cellWays;
40	/** The already detected errors */
41	private MultiMap<Way, Way> errorWays;
42
43	private final List<NormalizeRule> rules = new ArrayList<>();
44
45	/**
46	* Constructor
47	*/
48	public SimilarNamedWays() {
49	super(tr("Similarly named ways"),
50	tr("This test checks for ways with similar names that may have been misspelled."));
51
52	// FIXME: hardcode these rules for now. Replace them with preferences later
53	// See https://josm.openstreetmap.de/ticket/3733#comment:19
54	addRegExprRule("\\pN+", "0"); // Unicode numbers: matches "Highway 66" but also persian numbers
55	addRegExprRule("M{0,4}(CM\|CD\|D?C{0,3})(XC\|XL\|L?X{0,3})(IX\|IV\|V?I{0,3})$", "0"); // Roman numbers: matches "Building II"
56	addRegExprRule("\\d+(st\|nd\|rd\|th)", "0st"); // 3rd Ave
57	addRegExprRule("^[A-Z] ", "X"); // E Street
58	addSynonyms("east", "west", "north", "south");
59	addSynonyms("first", "second", "third");
60	}
61
62	@Override
63	public void startTest(ProgressMonitor monitor) {
64	super.startTest(monitor);
65	cellWays = new HashMap<>(1000);
66	errorWays = new MultiMap<>();
67	}
68
69	@Override
70	public void endTest() {
71	cellWays = null;
72	errorWays = null;
73	super.endTest();
74	}
75
76	@Override
77	public void visit(Way w) {
78	if (!w.isUsable())
79	return;
80
81	String name = w.get("name");
82	if (name == null \|\| name.length() < 6)
83	return;
84
85	List<List<Way>> theCellWays = ValUtil.getWaysInCell(w, cellWays);
86	for (List<Way> ways : theCellWays) {
87	for (Way w2 : ways) {
88	if (errorWays.contains(w, w2) \|\| errorWays.contains(w2, w)) {
89	continue;
90	}
91
92	String name2 = w2.get("name");
93	if (name2 == null \|\| name2.length() < 6) {
94	continue;
95	}
96
97	if (similaryName(name, name2)) {
98	List<OsmPrimitive> primitives = new ArrayList<>(2);
99	primitives.add(w);
100	primitives.add(w2);
101	errors.add(TestError.builder(this, Severity.WARNING, SIMILAR_NAMED)
102	.message(tr("Similarly named ways"))
103	.primitives(primitives)
104	.build());
105	errorWays.put(w, w2);
106	}
107	}
108	ways.add(w);
109	}
110	}
111
112	/**
113	* Add a regular expression rule.
114	* @param regExpr the regular expression to search for
115	* @param replacement a string to replace with, which should match the expression.
116	*/
117	public void addRegExprRule(String regExpr, String replacement) {
118	rules.add(new RegExprRule(regExpr, replacement));
119	}
120
121	/**
122	* Add a rule with synonym words.
123	* @param words words which are synonyms
124	*/
125	public void addSynonyms(String... words) {
126	for (String word : words) {
127	rules.add(new SynonymRule(word, words));
128	}
129	}
130
131	/**
132	* Check if two names are similar, but not identical. First both names will be "normalized".
133	* Afterwards the Levenshtein distance will be calculated.<br>
134	* Examples for normalization rules:<br>
135	* <code>replaceAll("\\d+", "0")</code><br>
136	* would cause similaryName("track 1", "track 2") = false, but similaryName("Track 1", "track 2") = true
137	* @param name first name to compare
138	* @param name2 second name to compare
139	* @return true if the normalized names are different but only a "little bit"
140	*/
141	public boolean similaryName(String name, String name2) {
142	boolean similar = Utils.isSimilar(name, name2);
143
144	// try all rules
145	for (NormalizeRule rule : rules) {
146	int levenshteinDistance = Utils.getLevenshteinDistance(rule.normalize(name), rule.normalize(name2));
147	if (levenshteinDistance == 0)
148	// one rule results in identical names: identical
149	return false;
150	else if (levenshteinDistance <= 2) {
151	// 0 < distance <= 2
152	similar = true;
153	}
154	}
155	return similar;
156	}
157
158	/**
159	* A normalization that is applied to names before testing them
160	*/
161	@FunctionalInterface
162	public interface NormalizeRule {
163
164	/**
165	* Normalize the string by replacing parts.
166	* @param name name to normalize
167	* @return normalized string
168	*/
169	String normalize(String name);
170	}
171
172	/**
173	* A rule to replace by regular expression,
174	* so that all strings matching the regular expression are handled as if they were {@link RegExprRule#replacement}
175	*/
176	public static class RegExprRule implements NormalizeRule {
177	private final Pattern regExpr;
178	private final String replacement;
179
180	/**
181	* Create a new rule to replace by regular expression
182	* @param expression The regular expression
183	* @param replacement The replacement
184	*/
185	public RegExprRule(String expression, String replacement) {
186	this.regExpr = Pattern.compile(expression);
187	this.replacement = replacement;
188	}
189
190	@Override
191	public String normalize(String name) {
192	return regExpr.matcher(name).replaceAll(replacement);
193	}
194
195	@Override
196	public String toString() {
197	return "replaceAll(" + regExpr + ", " + replacement + ')';
198	}
199	}
200
201	/**
202	* A rule that registers synonyms to a given word
203	*/
204	public static class SynonymRule implements NormalizeRule {
205
206	private final String[] words;
207	private final Pattern regExpr;
208	private final String replacement;
209
210	/**
211	* Create a new {@link SynonymRule}
212	* @param replacement The word to use instead
213	* @param words The synonyms for that word
214	*/
215	public SynonymRule(String replacement, String... words) {
216	this.replacement = replacement.toLowerCase(Locale.ENGLISH);
217	this.words = words;
218
219	// build regular expression for other words (for fast match)
220	StringBuilder expression = new StringBuilder();
221	int maxLength = 0;
222	for (String word : words) {
223	if (word.length() > maxLength) {
224	maxLength = word.length();
225	}
226	if (expression.length() > 0) {
227	expression.append('\|');
228	}
229	expression.append(Pattern.quote(word));
230	}
231	this.regExpr = Pattern.compile(expression.toString(), CASE_INSENSITIVE + UNICODE_CASE);
232	}
233
234	@Override
235	public String normalize(String name) {
236	// find first match
237	Matcher matcher = regExpr.matcher(name);
238	if (!matcher.find())
239	return name;
240
241	int start = matcher.start();
242
243	// which word matches?
244	String part = "";
245	for (String word : words) {
246	if (start + word.length() <= name.length()) {
247	part = name.substring(start, start + word.length());
248	}
249	if (word.equalsIgnoreCase(part)) {
250	break;
251	}
252	}
253
254	// replace the word
255	char[] newName = matcher.replaceFirst(replacement).toCharArray();
256
257	// adjust case (replacement is not shorter than matching word!)
258	int minLength = Math.min(replacement.length(), part.length());
259	for (int i = 0; i < minLength; i++) {
260	if (Character.isUpperCase(part.charAt(i))) {
261	newName[start + i] = Character.toUpperCase(newName[start + i]);
262	}
263	}
264
265	return new String(newName);
266	}
267
268	@Override
269	public String toString() {
270	return "synonyms(" + replacement + ", " + Arrays.toString(words) + ')';
271	}
272	}
273	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: josm/trunk/src/org/openstreetmap/josm/data/validation/tests/SimilarNamedWays.java

Download in other formats: