source: josm/trunk/src/org/openstreetmap/josm/data/validation/tests/TagChecker.java@ 15640

Last change on this file since 15640 was 15640, checked in by Don-vip, 4 years ago

fix #18455 - detect objects not matching their presets object type (info level)

  • Property svn:eol-style set to native
File size: 46.4 KB
Line 
1// License: GPL. For details, see LICENSE file.
2package org.openstreetmap.josm.data.validation.tests;
3
4import static org.openstreetmap.josm.tools.I18n.marktr;
5import static org.openstreetmap.josm.tools.I18n.tr;
6
7import java.awt.GridBagConstraints;
8import java.awt.event.ActionListener;
9import java.io.BufferedReader;
10import java.io.IOException;
11import java.lang.Character.UnicodeBlock;
12import java.util.ArrayList;
13import java.util.Arrays;
14import java.util.Collection;
15import java.util.Collections;
16import java.util.EnumSet;
17import java.util.HashMap;
18import java.util.HashSet;
19import java.util.List;
20import java.util.Locale;
21import java.util.Map;
22import java.util.Map.Entry;
23import java.util.Set;
24import java.util.regex.Pattern;
25
26import javax.swing.JCheckBox;
27import javax.swing.JLabel;
28import javax.swing.JPanel;
29
30import org.openstreetmap.josm.command.ChangePropertyCommand;
31import org.openstreetmap.josm.command.ChangePropertyKeyCommand;
32import org.openstreetmap.josm.command.Command;
33import org.openstreetmap.josm.command.SequenceCommand;
34import org.openstreetmap.josm.data.osm.AbstractPrimitive;
35import org.openstreetmap.josm.data.osm.OsmPrimitive;
36import org.openstreetmap.josm.data.osm.Tag;
37import org.openstreetmap.josm.data.osm.Tagged;
38import org.openstreetmap.josm.data.preferences.sources.ValidatorPrefHelper;
39import org.openstreetmap.josm.data.validation.Severity;
40import org.openstreetmap.josm.data.validation.Test.TagTest;
41import org.openstreetmap.josm.data.validation.TestError;
42import org.openstreetmap.josm.data.validation.util.Entities;
43import org.openstreetmap.josm.gui.progress.ProgressMonitor;
44import org.openstreetmap.josm.gui.tagging.presets.TaggingPreset;
45import org.openstreetmap.josm.gui.tagging.presets.TaggingPresetItem;
46import org.openstreetmap.josm.gui.tagging.presets.TaggingPresetListener;
47import org.openstreetmap.josm.gui.tagging.presets.TaggingPresetType;
48import org.openstreetmap.josm.gui.tagging.presets.TaggingPresets;
49import org.openstreetmap.josm.gui.tagging.presets.items.Check;
50import org.openstreetmap.josm.gui.tagging.presets.items.CheckGroup;
51import org.openstreetmap.josm.gui.tagging.presets.items.KeyedItem;
52import org.openstreetmap.josm.gui.widgets.EditableList;
53import org.openstreetmap.josm.io.CachedFile;
54import org.openstreetmap.josm.spi.preferences.Config;
55import org.openstreetmap.josm.tools.GBC;
56import org.openstreetmap.josm.tools.Logging;
57import org.openstreetmap.josm.tools.MultiMap;
58import org.openstreetmap.josm.tools.Utils;
59
60/**
61 * Check for misspelled or wrong tags
62 *
63 * @author frsantos
64 * @since 3669
65 */
66public class TagChecker extends TagTest implements TaggingPresetListener {
67
68 /** The config file of ignored tags */
69 public static final String IGNORE_FILE = "resource://data/validator/ignoretags.cfg";
70 /** The config file of dictionary words */
71 public static final String SPELL_FILE = "resource://data/validator/words.cfg";
72
73 /** Normalized keys: the key should be substituted by the value if the key was not found in presets */
74 private static final Map<String, String> harmonizedKeys = new HashMap<>();
75 /** The spell check preset values which are not stored in TaggingPresets */
76 private static volatile HashSet<String> additionalPresetsValueData;
77 /** often used tags which are not in presets */
78 private static volatile MultiMap<String, String> oftenUsedTags = new MultiMap<>();
79
80 private static final Pattern UNWANTED_NON_PRINTING_CONTROL_CHARACTERS = Pattern.compile(
81 "[\\x00-\\x09\\x0B\\x0C\\x0E-\\x1F\\x7F\\u200e-\\u200f\\u202a-\\u202e]");
82
83 /** The TagChecker data */
84 private static final List<String> ignoreDataStartsWith = new ArrayList<>();
85 private static final Set<String> ignoreDataEquals = new HashSet<>();
86 private static final List<String> ignoreDataEndsWith = new ArrayList<>();
87 private static final List<Tag> ignoreDataTag = new ArrayList<>();
88 /** tag keys that have only numerical values in the presets */
89 private static final Set<String> ignoreForLevenshtein = new HashSet<>();
90
91 /** The preferences prefix */
92 protected static final String PREFIX = ValidatorPrefHelper.PREFIX + "." + TagChecker.class.getSimpleName();
93
94 /**
95 * The preference key to check values
96 */
97 public static final String PREF_CHECK_VALUES = PREFIX + ".checkValues";
98 /**
99 * The preference key to check keys
100 */
101 public static final String PREF_CHECK_KEYS = PREFIX + ".checkKeys";
102 /**
103 * The preference key to enable complex checks
104 */
105 public static final String PREF_CHECK_COMPLEX = PREFIX + ".checkComplex";
106 /**
107 * The preference key to search for fixme tags
108 */
109 public static final String PREF_CHECK_FIXMES = PREFIX + ".checkFixmes";
110 /**
111 * The preference key to check presets
112 */
113 public static final String PREF_CHECK_PRESETS_TYPES = PREFIX + ".checkPresetsTypes";
114
115 /**
116 * The preference key for source files
117 * @see #DEFAULT_SOURCES
118 */
119 public static final String PREF_SOURCES = PREFIX + ".source";
120
121 private static final String BEFORE_UPLOAD = "BeforeUpload";
122 /**
123 * The preference key to check keys - used before upload
124 */
125 public static final String PREF_CHECK_KEYS_BEFORE_UPLOAD = PREF_CHECK_KEYS + BEFORE_UPLOAD;
126 /**
127 * The preference key to check values - used before upload
128 */
129 public static final String PREF_CHECK_VALUES_BEFORE_UPLOAD = PREF_CHECK_VALUES + BEFORE_UPLOAD;
130 /**
131 * The preference key to run complex tests - used before upload
132 */
133 public static final String PREF_CHECK_COMPLEX_BEFORE_UPLOAD = PREF_CHECK_COMPLEX + BEFORE_UPLOAD;
134 /**
135 * The preference key to search for fixmes - used before upload
136 */
137 public static final String PREF_CHECK_FIXMES_BEFORE_UPLOAD = PREF_CHECK_FIXMES + BEFORE_UPLOAD;
138 /**
139 * The preference key to search for presets - used before upload
140 */
141 public static final String PREF_CHECK_PRESETS_TYPES_BEFORE_UPLOAD = PREF_CHECK_PRESETS_TYPES + BEFORE_UPLOAD;
142
143 private static final int MAX_LEVENSHTEIN_DISTANCE = 2;
144
145 protected boolean checkKeys;
146 protected boolean checkValues;
147 /** Was used for special configuration file, might be used to disable value spell checker. */
148 protected boolean checkComplex;
149 protected boolean checkFixmes;
150 protected boolean checkPresetsTypes;
151
152 protected JCheckBox prefCheckKeys;
153 protected JCheckBox prefCheckValues;
154 protected JCheckBox prefCheckComplex;
155 protected JCheckBox prefCheckFixmes;
156 protected JCheckBox prefCheckPresetsTypes;
157
158 protected JCheckBox prefCheckKeysBeforeUpload;
159 protected JCheckBox prefCheckValuesBeforeUpload;
160 protected JCheckBox prefCheckComplexBeforeUpload;
161 protected JCheckBox prefCheckFixmesBeforeUpload;
162 protected JCheckBox prefCheckPresetsTypesBeforeUpload;
163
164 // CHECKSTYLE.OFF: SingleSpaceSeparator
165 protected static final int EMPTY_VALUES = 1200;
166 protected static final int INVALID_KEY = 1201;
167 protected static final int INVALID_VALUE = 1202;
168 protected static final int FIXME = 1203;
169 protected static final int INVALID_SPACE = 1204;
170 protected static final int INVALID_KEY_SPACE = 1205;
171 protected static final int INVALID_HTML = 1206; /* 1207 was PAINT */
172 protected static final int LONG_VALUE = 1208;
173 protected static final int LONG_KEY = 1209;
174 protected static final int LOW_CHAR_VALUE = 1210;
175 protected static final int LOW_CHAR_KEY = 1211;
176 protected static final int MISSPELLED_VALUE = 1212;
177 protected static final int MISSPELLED_KEY = 1213;
178 protected static final int MULTIPLE_SPACES = 1214;
179 protected static final int MISSPELLED_VALUE_NO_FIX = 1215;
180 protected static final int UNUSUAL_UNICODE_CHAR_VALUE = 1216;
181 protected static final int INVALID_PRESETS_TYPE = 1217;
182 // CHECKSTYLE.ON: SingleSpaceSeparator
183
184 protected EditableList sourcesList;
185
186 private static final List<String> DEFAULT_SOURCES = Arrays.asList(IGNORE_FILE, SPELL_FILE);
187
188 /**
189 * Constructor
190 */
191 public TagChecker() {
192 super(tr("Tag checker"), tr("This test checks for errors in tag keys and values."));
193 }
194
195 @Override
196 public void initialize() throws IOException {
197 TaggingPresets.addListener(this);
198 initializeData();
199 initializePresets();
200 analysePresets();
201 }
202
203 /**
204 * Add presets that contain only numerical values to the ignore list
205 */
206 private static void analysePresets() {
207 for (String key : TaggingPresets.getPresetKeys()) {
208 if (isKeyIgnored(key))
209 continue;
210 boolean allNumerical = true;
211 Set<String> values = TaggingPresets.getPresetValues(key);
212 if (values.isEmpty())
213 allNumerical = false;
214 for (String val : values) {
215 if (!isNum(val)) {
216 allNumerical = false;
217 break;
218 }
219 }
220 if (allNumerical) {
221 ignoreForLevenshtein.add(key);
222 }
223 }
224 }
225
226 /**
227 * Reads the spell-check file into a HashMap.
228 * The data file is a list of words, beginning with +/-. If it starts with +,
229 * the word is valid, but if it starts with -, the word should be replaced
230 * by the nearest + word before this.
231 *
232 * @throws IOException if any I/O error occurs
233 */
234 private static void initializeData() throws IOException {
235 ignoreDataStartsWith.clear();
236 ignoreDataEquals.clear();
237 ignoreDataEndsWith.clear();
238 ignoreDataTag.clear();
239 harmonizedKeys.clear();
240 ignoreForLevenshtein.clear();
241 oftenUsedTags.clear();
242
243 StringBuilder errorSources = new StringBuilder();
244 for (String source : Config.getPref().getList(PREF_SOURCES, DEFAULT_SOURCES)) {
245 try (
246 CachedFile cf = new CachedFile(source);
247 BufferedReader reader = cf.getContentReader()
248 ) {
249 String okValue = null;
250 boolean tagcheckerfile = false;
251 boolean ignorefile = false;
252 boolean isFirstLine = true;
253 String line;
254 while ((line = reader.readLine()) != null) {
255 if (line.isEmpty()) {
256 // ignore
257 } else if (line.startsWith("#")) {
258 if (line.startsWith("# JOSM TagChecker")) {
259 tagcheckerfile = true;
260 Logging.error(tr("Ignoring {0}. Support was dropped", source));
261 } else
262 if (line.startsWith("# JOSM IgnoreTags")) {
263 ignorefile = true;
264 if (!DEFAULT_SOURCES.contains(source)) {
265 Logging.info(tr("Adding {0} to ignore tags", source));
266 }
267 }
268 } else if (ignorefile) {
269 parseIgnoreFileLine(source, line);
270 } else if (tagcheckerfile) {
271 // ignore
272 } else if (line.charAt(0) == '+') {
273 okValue = line.substring(1);
274 } else if (line.charAt(0) == '-' && okValue != null) {
275 String hk = harmonizeKey(line.substring(1));
276 if (!okValue.equals(hk) && harmonizedKeys.put(hk, okValue) != null) {
277 Logging.debug(tr("Line was ignored: {0}", line));
278 }
279 } else {
280 Logging.error(tr("Invalid spellcheck line: {0}", line));
281 }
282 if (isFirstLine) {
283 isFirstLine = false;
284 if (!(tagcheckerfile || ignorefile) && !DEFAULT_SOURCES.contains(source)) {
285 Logging.info(tr("Adding {0} to spellchecker", source));
286 }
287 }
288 }
289 } catch (IOException e) {
290 Logging.error(e);
291 errorSources.append(source).append('\n');
292 }
293 }
294
295 if (errorSources.length() > 0)
296 throw new IOException(tr("Could not access data file(s):\n{0}", errorSources));
297 }
298
299 /**
300 * Parse a line found in a configuration file
301 * @param source name of configuration file
302 * @param line the line to parse
303 */
304 private static void parseIgnoreFileLine(String source, String line) {
305 line = line.trim();
306 if (line.length() < 4) {
307 return;
308 }
309 try {
310 String key = line.substring(0, 2);
311 line = line.substring(2);
312
313 switch (key) {
314 case "S:":
315 ignoreDataStartsWith.add(line);
316 break;
317 case "E:":
318 ignoreDataEquals.add(line);
319 addToKeyDictionary(line);
320 break;
321 case "F:":
322 ignoreDataEndsWith.add(line);
323 break;
324 case "K:":
325 Tag tag = Tag.ofString(line);
326 ignoreDataTag.add(tag);
327 oftenUsedTags.put(tag.getKey(), tag.getValue());
328 addToKeyDictionary(tag.getKey());
329 break;
330 default:
331 if (!key.startsWith(";")) {
332 Logging.warn("Unsupported TagChecker key: " + key);
333 }
334 }
335 } catch (IllegalArgumentException e) {
336 Logging.error("Invalid line in {0} : {1}", source, e.getMessage());
337 Logging.trace(e);
338 }
339 }
340
341 private static void addToKeyDictionary(String key) {
342 if (key != null) {
343 String hk = harmonizeKey(key);
344 if (!key.equals(hk)) {
345 harmonizedKeys.put(hk, key);
346 }
347 }
348 }
349
350 /**
351 * Reads the presets data.
352 *
353 */
354 public static void initializePresets() {
355
356 if (!Config.getPref().getBoolean(PREF_CHECK_VALUES, true))
357 return;
358
359 Collection<TaggingPreset> presets = TaggingPresets.getTaggingPresets();
360 if (!presets.isEmpty()) {
361 initAdditionalPresetsValueData();
362 for (TaggingPreset p : presets) {
363 for (TaggingPresetItem i : p.data) {
364 if (i instanceof KeyedItem) {
365 addPresetValue((KeyedItem) i);
366 } else if (i instanceof CheckGroup) {
367 for (Check c : ((CheckGroup) i).checks) {
368 addPresetValue(c);
369 }
370 }
371 }
372 }
373 }
374 }
375
376 private static void initAdditionalPresetsValueData() {
377 additionalPresetsValueData = new HashSet<>();
378 for (String a : AbstractPrimitive.getUninterestingKeys()) {
379 additionalPresetsValueData.add(a);
380 }
381 for (String a : Config.getPref().getList(ValidatorPrefHelper.PREFIX + ".knownkeys",
382 Arrays.asList("is_in", "int_ref", "fixme", "population"))) {
383 additionalPresetsValueData.add(a);
384 }
385 }
386
387 private static void addPresetValue(KeyedItem ky) {
388 if (ky.key != null && ky.getValues() != null) {
389 addToKeyDictionary(ky.key);
390 }
391 }
392
393 /**
394 * Checks given string (key or value) if it contains unwanted non-printing control characters (either ASCII or Unicode bidi characters)
395 * @param s string to check
396 * @return {@code true} if {@code s} contains non-printing control characters
397 */
398 static boolean containsUnwantedNonPrintingControlCharacter(String s) {
399 return s != null && !s.isEmpty() && (
400 isJoiningChar(s.charAt(0)) ||
401 isJoiningChar(s.charAt(s.length() - 1)) ||
402 s.chars().anyMatch(c -> (isAsciiControlChar(c) && !isNewLineChar(c)) || isBidiControlChar(c))
403 );
404 }
405
406 private static boolean isAsciiControlChar(int c) {
407 return c < 0x20 || c == 0x7F;
408 }
409
410 private static boolean isNewLineChar(int c) {
411 return c == 0x0a || c == 0x0d;
412 }
413
414 private static boolean isJoiningChar(int c) {
415 return c == 0x200c || c == 0x200d; // ZWNJ, ZWJ
416 }
417
418 private static boolean isBidiControlChar(int c) {
419 /* check for range 0x200e to 0x200f (LRM, RLM) or
420 0x202a to 0x202e (LRE, RLE, PDF, LRO, RLO) */
421 return (c >= 0x200e && c <= 0x200f) || (c >= 0x202a && c <= 0x202e);
422 }
423
424 static String removeUnwantedNonPrintingControlCharacters(String s) {
425 // Remove all unwanted characters
426 String result = UNWANTED_NON_PRINTING_CONTROL_CHARACTERS.matcher(s).replaceAll("");
427 // Remove joining characters located at the beginning of the string
428 while (!result.isEmpty() && isJoiningChar(result.charAt(0))) {
429 result = result.substring(1);
430 }
431 // Remove joining characters located at the end of the string
432 while (!result.isEmpty() && isJoiningChar(result.charAt(result.length() - 1))) {
433 result = result.substring(0, result.length() - 1);
434 }
435 return result;
436 }
437
438 static boolean containsUnusualUnicodeCharacter(String key, String value) {
439 return value != null && value.chars().anyMatch(c -> isUnusualUnicodeBlock(key, c));
440 }
441
442 /**
443 * Detects highly suspicious Unicode characters that have been seen in OSM database.
444 * @param key tag key
445 * @param c current character code point
446 * @return {@code true} if the current unicode block is very unusual for the given key
447 */
448 private static boolean isUnusualUnicodeBlock(String key, int c) {
449 UnicodeBlock b = UnicodeBlock.of(c);
450 return isUnusualPhoneticUse(key, b, c) || isUnusualBmpUse(b) || isUnusualSmpUse(b);
451 }
452
453 private static boolean isAllowedPhoneticCharacter(String key, int c) {
454 return c == 0x0259 // U+0259 is used as a standard character in azerbaidjani
455 || (key.endsWith("ref") && 0x1D2C <= c && c <= 0x1D42); // allow uppercase superscript latin characters in *ref tags
456 }
457
458 private static boolean isUnusualPhoneticUse(String key, UnicodeBlock b, int c) {
459 return !isAllowedPhoneticCharacter(key, c)
460 && (b == UnicodeBlock.IPA_EXTENSIONS // U+0250..U+02AF
461 || b == UnicodeBlock.PHONETIC_EXTENSIONS // U+1D00..U+1D7F
462 || b == UnicodeBlock.PHONETIC_EXTENSIONS_SUPPLEMENT) // U+1D80..U+1DBF
463 && !key.endsWith(":pronunciation");
464 }
465
466 private static boolean isUnusualBmpUse(UnicodeBlock b) {
467 // CHECKSTYLE.OFF: BooleanExpressionComplexity
468 return b == UnicodeBlock.COMBINING_MARKS_FOR_SYMBOLS // U+20D0..U+20FF
469 || b == UnicodeBlock.MATHEMATICAL_OPERATORS // U+2200..U+22FF
470 || b == UnicodeBlock.ENCLOSED_ALPHANUMERICS // U+2460..U+24FF
471 || b == UnicodeBlock.BOX_DRAWING // U+2500..U+257F
472 || b == UnicodeBlock.GEOMETRIC_SHAPES // U+25A0..U+25FF
473 || b == UnicodeBlock.DINGBATS // U+2700..U+27BF
474 || b == UnicodeBlock.MISCELLANEOUS_SYMBOLS_AND_ARROWS // U+2B00..U+2BFF
475 || b == UnicodeBlock.GLAGOLITIC // U+2C00..U+2C5F
476 || b == UnicodeBlock.HANGUL_COMPATIBILITY_JAMO // U+3130..U+318F
477 || b == UnicodeBlock.ENCLOSED_CJK_LETTERS_AND_MONTHS // U+3200..U+32FF
478 || b == UnicodeBlock.LATIN_EXTENDED_D // U+A720..U+A7FF
479 || b == UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS // U+F900..U+FAFF
480 || b == UnicodeBlock.ALPHABETIC_PRESENTATION_FORMS // U+FB00..U+FB4F
481 || b == UnicodeBlock.VARIATION_SELECTORS // U+FE00..U+FE0F
482 || b == UnicodeBlock.SPECIALS; // U+FFF0..U+FFFF
483 // CHECKSTYLE.ON: BooleanExpressionComplexity
484 }
485
486 private static boolean isUnusualSmpUse(UnicodeBlock b) {
487 // UnicodeBlock.SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS is only defined in Java 9+
488 return b == UnicodeBlock.MUSICAL_SYMBOLS // U+1D100..U+1D1FF
489 || b == UnicodeBlock.ENCLOSED_ALPHANUMERIC_SUPPLEMENT // U+1F100..U+1F1FF
490 || b == UnicodeBlock.EMOTICONS // U+1F600..U+1F64F
491 || b == UnicodeBlock.TRANSPORT_AND_MAP_SYMBOLS; // U+1F680..U+1F6FF
492 }
493
494 /**
495 * Get set of preset values for the given key.
496 * @param key the key
497 * @return null if key is not in presets or in additionalPresetsValueData,
498 * else a set which might be empty.
499 */
500 private static Set<String> getPresetValues(String key) {
501 Set<String> res = TaggingPresets.getPresetValues(key);
502 if (res != null)
503 return res;
504 if (additionalPresetsValueData.contains(key))
505 return Collections.emptySet();
506 // null means key is not known
507 return null;
508 }
509
510 /**
511 * Determines if the given key is in internal presets.
512 * @param key key
513 * @return {@code true} if the given key is in internal presets
514 * @since 9023
515 */
516 public static boolean isKeyInPresets(String key) {
517 return TaggingPresets.getPresetValues(key) != null;
518 }
519
520 /**
521 * Determines if the given tag is in internal presets.
522 * @param key key
523 * @param value value
524 * @return {@code true} if the given tag is in internal presets
525 * @since 9023
526 */
527 public static boolean isTagInPresets(String key, String value) {
528 final Set<String> values = getPresetValues(key);
529 return values != null && values.contains(value);
530 }
531
532 /**
533 * Returns the list of ignored tags.
534 * @return the list of ignored tags
535 * @since 9023
536 */
537 public static List<Tag> getIgnoredTags() {
538 return new ArrayList<>(ignoreDataTag);
539 }
540
541 /**
542 * Determines if the given tag key is ignored for checks "key/tag not in presets".
543 * @param key key
544 * @return true if the given key is ignored
545 */
546 private static boolean isKeyIgnored(String key) {
547 if (ignoreDataEquals.contains(key)) {
548 return true;
549 }
550 for (String a : ignoreDataStartsWith) {
551 if (key.startsWith(a)) {
552 return true;
553 }
554 }
555 for (String a : ignoreDataEndsWith) {
556 if (key.endsWith(a)) {
557 return true;
558 }
559 }
560 return false;
561 }
562
563 /**
564 * Determines if the given tag is ignored for checks "key/tag not in presets".
565 * @param key key
566 * @param value value
567 * @return {@code true} if the given tag is ignored
568 * @since 9023
569 */
570 public static boolean isTagIgnored(String key, String value) {
571 if (isKeyIgnored(key))
572 return true;
573 final Set<String> values = getPresetValues(key);
574 if (values != null && values.isEmpty())
575 return true;
576 if (!isTagInPresets(key, value)) {
577 for (Tag a : ignoreDataTag) {
578 if (key.equals(a.getKey()) && value.equals(a.getValue())) {
579 return true;
580 }
581 }
582 }
583 return false;
584 }
585
586 /**
587 * Checks the primitive tags
588 * @param p The primitive to check
589 */
590 @Override
591 public void check(OsmPrimitive p) {
592 // Just a collection to know if a primitive has been already marked with error
593 MultiMap<OsmPrimitive, String> withErrors = new MultiMap<>();
594
595 for (Entry<String, String> prop : p.getKeys().entrySet()) {
596 String s = marktr("Tag ''{0}'' invalid.");
597 String key = prop.getKey();
598 String value = prop.getValue();
599
600 if (checkKeys) {
601 checkSingleTagKeySimple(withErrors, p, s, key);
602 }
603 if (checkValues) {
604 checkSingleTagValueSimple(withErrors, p, s, key, value);
605 checkSingleTagComplex(withErrors, p, key, value);
606 }
607 if (checkFixmes && key != null && value != null && !value.isEmpty() && isFixme(key, value) && !withErrors.contains(p, "FIXME")) {
608 errors.add(TestError.builder(this, Severity.OTHER, FIXME)
609 .message(tr("FIXMES"))
610 .primitives(p)
611 .build());
612 withErrors.put(p, "FIXME");
613 }
614 }
615
616 if (checkPresetsTypes) {
617 for (TaggingPreset tp : TaggingPresets.getMatchingPresets(null, p.getKeys(), false)) {
618 TaggingPresetType presetType = TaggingPresetType.forPrimitive(p);
619 if (!tp.typeMatches(EnumSet.of(presetType))) {
620 errors.add(TestError.builder(this, Severity.OTHER, INVALID_PRESETS_TYPE)
621 .message(tr("Wrong presets types"),
622 marktr("{0} is not supported by tagging preset: {1}"), tr(presetType.getName()), tp.getLocaleName())
623 .primitives(p)
624 .build());
625 }
626 }
627 }
628 }
629
630 private void checkSingleTagValueSimple(MultiMap<OsmPrimitive, String> withErrors, OsmPrimitive p, String s, String key, String value) {
631 if (!checkValues || value == null)
632 return;
633 if ((containsUnwantedNonPrintingControlCharacter(value)) && !withErrors.contains(p, "ICV")) {
634 errors.add(TestError.builder(this, Severity.WARNING, LOW_CHAR_VALUE)
635 .message(tr("Tag value contains non-printing (usually invisible) character"), s, key)
636 .primitives(p)
637 .fix(() -> new ChangePropertyCommand(p, key, removeUnwantedNonPrintingControlCharacters(value)))
638 .build());
639 withErrors.put(p, "ICV");
640 }
641 if ((containsUnusualUnicodeCharacter(key, value)) && !withErrors.contains(p, "UUCV")) {
642 errors.add(TestError.builder(this, Severity.WARNING, UNUSUAL_UNICODE_CHAR_VALUE)
643 .message(tr("Tag value contains unusual Unicode character"), s, key)
644 .primitives(p)
645 .build());
646 withErrors.put(p, "UUCV");
647 }
648 if ((value.length() > Tagged.MAX_TAG_LENGTH) && !withErrors.contains(p, "LV")) {
649 errors.add(TestError.builder(this, Severity.ERROR, LONG_VALUE)
650 .message(tr("Tag value longer than {0} characters ({1} characters)", Tagged.MAX_TAG_LENGTH, value.length()), s, key)
651 .primitives(p)
652 .build());
653 withErrors.put(p, "LV");
654 }
655 if ((value.trim().isEmpty()) && !withErrors.contains(p, "EV")) {
656 errors.add(TestError.builder(this, Severity.WARNING, EMPTY_VALUES)
657 .message(tr("Tags with empty values"), s, key)
658 .primitives(p)
659 .build());
660 withErrors.put(p, "EV");
661 }
662 final String errTypeSpace = "SPACE";
663 if ((value.startsWith(" ") || value.endsWith(" ")) && !withErrors.contains(p, errTypeSpace)) {
664 errors.add(TestError.builder(this, Severity.WARNING, INVALID_SPACE)
665 .message(tr("Property values start or end with white space"), s, key)
666 .primitives(p)
667 .build());
668 withErrors.put(p, errTypeSpace);
669 }
670 if (value.contains(" ") && !withErrors.contains(p, errTypeSpace)) {
671 errors.add(TestError.builder(this, Severity.WARNING, MULTIPLE_SPACES)
672 .message(tr("Property values contain multiple white spaces"), s, key)
673 .primitives(p)
674 .build());
675 withErrors.put(p, errTypeSpace);
676 }
677 if (!value.equals(Entities.unescape(value)) && !withErrors.contains(p, "HTML")) {
678 errors.add(TestError.builder(this, Severity.OTHER, INVALID_HTML)
679 .message(tr("Property values contain HTML entity"), s, key)
680 .primitives(p)
681 .build());
682 withErrors.put(p, "HTML");
683 }
684 }
685
686 private void checkSingleTagKeySimple(MultiMap<OsmPrimitive, String> withErrors, OsmPrimitive p, String s, String key) {
687 if (!checkKeys || key == null)
688 return;
689 if ((containsUnwantedNonPrintingControlCharacter(key)) && !withErrors.contains(p, "ICK")) {
690 errors.add(TestError.builder(this, Severity.WARNING, LOW_CHAR_KEY)
691 .message(tr("Tag key contains non-printing character"), s, key)
692 .primitives(p)
693 .fix(() -> new ChangePropertyCommand(p, key, removeUnwantedNonPrintingControlCharacters(key)))
694 .build());
695 withErrors.put(p, "ICK");
696 }
697 if (key.length() > Tagged.MAX_TAG_LENGTH && !withErrors.contains(p, "LK")) {
698 errors.add(TestError.builder(this, Severity.ERROR, LONG_KEY)
699 .message(tr("Tag key longer than {0} characters ({1} characters)", Tagged.MAX_TAG_LENGTH, key.length()), s, key)
700 .primitives(p)
701 .build());
702 withErrors.put(p, "LK");
703 }
704 if (key.indexOf(' ') >= 0 && !withErrors.contains(p, "IPK")) {
705 errors.add(TestError.builder(this, Severity.WARNING, INVALID_KEY_SPACE)
706 .message(tr("Invalid white space in property key"), s, key)
707 .primitives(p)
708 .build());
709 withErrors.put(p, "IPK");
710 }
711 }
712
713 private void checkSingleTagComplex(MultiMap<OsmPrimitive, String> withErrors, OsmPrimitive p, String key, String value) {
714 if (!checkValues || key == null || value == null || value.isEmpty())
715 return;
716 if (additionalPresetsValueData != null && !isTagIgnored(key, value)) {
717 if (!isKeyInPresets(key)) {
718 spellCheckKey(withErrors, p, key);
719 } else if (!isTagInPresets(key, value)) {
720 if (oftenUsedTags.contains(key, value)) {
721 // tag is quite often used but not in presets
722 errors.add(TestError.builder(this, Severity.OTHER, INVALID_VALUE)
723 .message(tr("Presets do not contain property value"),
724 marktr("Value ''{0}'' for key ''{1}'' not in presets, but is known."), value, key)
725 .primitives(p)
726 .build());
727 withErrors.put(p, "UPV");
728 } else {
729 tryGuess(p, key, value, withErrors);
730 }
731 }
732 }
733 }
734
735 private void spellCheckKey(MultiMap<OsmPrimitive, String> withErrors, OsmPrimitive p, String key) {
736 String prettifiedKey = harmonizeKey(key);
737 String fixedKey;
738 if (ignoreDataEquals.contains(prettifiedKey)) {
739 fixedKey = prettifiedKey;
740 } else {
741 fixedKey = isKeyInPresets(prettifiedKey) ? prettifiedKey : harmonizedKeys.get(prettifiedKey);
742 }
743 if (fixedKey == null) {
744 for (Tag a : ignoreDataTag) {
745 if (a.getKey().equals(prettifiedKey)) {
746 fixedKey = prettifiedKey;
747 break;
748 }
749 }
750 }
751
752 if (fixedKey != null && !"".equals(fixedKey) && !fixedKey.equals(key)) {
753 final String proposedKey = fixedKey;
754 // misspelled preset key
755 final TestError.Builder error = TestError.builder(this, Severity.WARNING, MISSPELLED_KEY)
756 .message(tr("Misspelled property key"), marktr("Key ''{0}'' looks like ''{1}''."), key, proposedKey)
757 .primitives(p);
758 if (p.hasKey(fixedKey)) {
759 errors.add(error.build());
760 } else {
761 errors.add(error.fix(() -> new ChangePropertyKeyCommand(p, key, proposedKey)).build());
762 }
763 withErrors.put(p, "WPK");
764 } else {
765 errors.add(TestError.builder(this, Severity.OTHER, INVALID_KEY)
766 .message(tr("Presets do not contain property key"), marktr("Key ''{0}'' not in presets."), key)
767 .primitives(p)
768 .build());
769 withErrors.put(p, "UPK");
770 }
771 }
772
773 private void tryGuess(OsmPrimitive p, String key, String value, MultiMap<OsmPrimitive, String> withErrors) {
774 // try to fix common typos and check again if value is still unknown
775 final String harmonizedValue = harmonizeValue(value);
776 if (harmonizedValue == null || harmonizedValue.isEmpty())
777 return;
778 String fixedValue = null;
779 List<Set<String>> sets = new ArrayList<>();
780 Set<String> presetValues = getPresetValues(key);
781 if (presetValues != null)
782 sets.add(presetValues);
783 Set<String> usedValues = oftenUsedTags.get(key);
784 if (usedValues != null)
785 sets.add(usedValues);
786 for (Set<String> possibleValues: sets) {
787 if (possibleValues.contains(harmonizedValue)) {
788 fixedValue = harmonizedValue;
789 break;
790 }
791 }
792 if (fixedValue == null && !ignoreForLevenshtein.contains(key)) {
793 int maxPresetValueLen = 0;
794 List<String> fixVals = new ArrayList<>();
795 // use Levenshtein distance to find typical typos
796 int minDist = MAX_LEVENSHTEIN_DISTANCE + 1;
797 String closest = null;
798 for (Set<String> possibleValues: sets) {
799 for (String possibleVal : possibleValues) {
800 if (possibleVal.isEmpty())
801 continue;
802 maxPresetValueLen = Math.max(maxPresetValueLen, possibleVal.length());
803 if (harmonizedValue.length() < 3 && possibleVal.length() >= harmonizedValue.length() + MAX_LEVENSHTEIN_DISTANCE) {
804 // don't suggest fix value when given value is short and lengths are too different
805 // for example surface=u would result in surface=mud
806 continue;
807 }
808 int dist = Utils.getLevenshteinDistance(possibleVal, harmonizedValue);
809 if (dist >= harmonizedValue.length()) {
810 // short value, all characters are different. Don't warn, might say Value '10' for key 'fee' looks like 'no'.
811 continue;
812 }
813 if (dist < minDist) {
814 closest = possibleVal;
815 minDist = dist;
816 fixVals.clear();
817 fixVals.add(possibleVal);
818 } else if (dist == minDist) {
819 fixVals.add(possibleVal);
820 }
821 }
822 }
823
824 if (minDist <= MAX_LEVENSHTEIN_DISTANCE && maxPresetValueLen > MAX_LEVENSHTEIN_DISTANCE
825 && (harmonizedValue.length() > 3 || minDist < MAX_LEVENSHTEIN_DISTANCE)) {
826 if (fixVals.size() < 2) {
827 fixedValue = closest;
828 } else {
829 Collections.sort(fixVals);
830 // misspelled preset value with multiple good alternatives
831 errors.add(TestError.builder(this, Severity.WARNING, MISSPELLED_VALUE_NO_FIX)
832 .message(tr("Unknown property value"),
833 marktr("Value ''{0}'' for key ''{1}'' is unknown, maybe one of {2} is meant?"),
834 value, key, fixVals)
835 .primitives(p).build());
836 withErrors.put(p, "WPV");
837 return;
838 }
839 }
840 }
841 if (fixedValue != null && !fixedValue.equals(value)) {
842 final String newValue = fixedValue;
843 // misspelled preset value
844 errors.add(TestError.builder(this, Severity.WARNING, MISSPELLED_VALUE)
845 .message(tr("Unknown property value"),
846 marktr("Value ''{0}'' for key ''{1}'' is unknown, maybe ''{2}'' is meant?"), value, key, newValue)
847 .primitives(p)
848 .build());
849 withErrors.put(p, "WPV");
850 } else {
851 // unknown preset value
852 errors.add(TestError.builder(this, Severity.OTHER, INVALID_VALUE)
853 .message(tr("Presets do not contain property value"),
854 marktr("Value ''{0}'' for key ''{1}'' not in presets."), value, key)
855 .primitives(p)
856 .build());
857 withErrors.put(p, "UPV");
858 }
859 }
860
861 private static boolean isNum(String harmonizedValue) {
862 try {
863 Double.parseDouble(harmonizedValue);
864 return true;
865 } catch (NumberFormatException e) {
866 return false;
867 }
868 }
869
870 private static boolean isFixme(String key, String value) {
871 return key.toLowerCase(Locale.ENGLISH).contains("fixme") || key.contains("todo")
872 || value.toLowerCase(Locale.ENGLISH).contains("fixme") || value.contains("check and delete");
873 }
874
875 private static String harmonizeKey(String key) {
876 return Utils.strip(key.toLowerCase(Locale.ENGLISH).replace('-', '_').replace(':', '_').replace(' ', '_'), "-_;:,");
877 }
878
879 private static String harmonizeValue(String value) {
880 return Utils.strip(value.toLowerCase(Locale.ENGLISH).replace('-', '_').replace(' ', '_'), "-_;:,");
881 }
882
883 @Override
884 public void startTest(ProgressMonitor monitor) {
885 super.startTest(monitor);
886 checkKeys = Config.getPref().getBoolean(PREF_CHECK_KEYS, true);
887 if (isBeforeUpload) {
888 checkKeys = checkKeys && Config.getPref().getBoolean(PREF_CHECK_KEYS_BEFORE_UPLOAD, true);
889 }
890
891 checkValues = Config.getPref().getBoolean(PREF_CHECK_VALUES, true);
892 if (isBeforeUpload) {
893 checkValues = checkValues && Config.getPref().getBoolean(PREF_CHECK_VALUES_BEFORE_UPLOAD, true);
894 }
895
896 checkComplex = Config.getPref().getBoolean(PREF_CHECK_COMPLEX, true);
897 if (isBeforeUpload) {
898 checkComplex = checkComplex && Config.getPref().getBoolean(PREF_CHECK_COMPLEX_BEFORE_UPLOAD, true);
899 }
900
901 checkFixmes = Config.getPref().getBoolean(PREF_CHECK_FIXMES, true);
902 if (isBeforeUpload) {
903 checkFixmes = checkFixmes && Config.getPref().getBoolean(PREF_CHECK_FIXMES_BEFORE_UPLOAD, true);
904 }
905
906 checkPresetsTypes = Config.getPref().getBoolean(PREF_CHECK_PRESETS_TYPES, true);
907 if (isBeforeUpload) {
908 checkPresetsTypes = checkPresetsTypes && Config.getPref().getBoolean(PREF_CHECK_PRESETS_TYPES_BEFORE_UPLOAD, true);
909 }
910 }
911
912 @Override
913 public void visit(Collection<OsmPrimitive> selection) {
914 if (checkKeys || checkValues || checkComplex || checkFixmes || checkPresetsTypes) {
915 super.visit(selection);
916 }
917 }
918
919 @Override
920 public void addGui(JPanel testPanel) {
921 GBC a = GBC.eol();
922 a.anchor = GridBagConstraints.EAST;
923
924 testPanel.add(new JLabel(name+" :"), GBC.eol().insets(3, 0, 0, 0));
925
926 prefCheckKeys = new JCheckBox(tr("Check property keys."), Config.getPref().getBoolean(PREF_CHECK_KEYS, true));
927 prefCheckKeys.setToolTipText(tr("Validate that property keys are valid checking against list of words."));
928 testPanel.add(prefCheckKeys, GBC.std().insets(20, 0, 0, 0));
929
930 prefCheckKeysBeforeUpload = new JCheckBox();
931 prefCheckKeysBeforeUpload.setSelected(Config.getPref().getBoolean(PREF_CHECK_KEYS_BEFORE_UPLOAD, true));
932 testPanel.add(prefCheckKeysBeforeUpload, a);
933
934 prefCheckComplex = new JCheckBox(tr("Use complex property checker."), Config.getPref().getBoolean(PREF_CHECK_COMPLEX, true));
935 prefCheckComplex.setToolTipText(tr("Validate property values and tags using complex rules."));
936 testPanel.add(prefCheckComplex, GBC.std().insets(20, 0, 0, 0));
937
938 prefCheckComplexBeforeUpload = new JCheckBox();
939 prefCheckComplexBeforeUpload.setSelected(Config.getPref().getBoolean(PREF_CHECK_COMPLEX_BEFORE_UPLOAD, true));
940 testPanel.add(prefCheckComplexBeforeUpload, a);
941
942 final Collection<String> sources = Config.getPref().getList(PREF_SOURCES, DEFAULT_SOURCES);
943 sourcesList = new EditableList(tr("TagChecker source"));
944 sourcesList.setItems(sources);
945 testPanel.add(new JLabel(tr("Data sources ({0})", "*.cfg")), GBC.eol().insets(23, 0, 0, 0));
946 testPanel.add(sourcesList, GBC.eol().fill(GridBagConstraints.HORIZONTAL).insets(23, 0, 0, 0));
947
948 ActionListener disableCheckActionListener = e -> handlePrefEnable();
949 prefCheckKeys.addActionListener(disableCheckActionListener);
950 prefCheckKeysBeforeUpload.addActionListener(disableCheckActionListener);
951 prefCheckComplex.addActionListener(disableCheckActionListener);
952 prefCheckComplexBeforeUpload.addActionListener(disableCheckActionListener);
953
954 handlePrefEnable();
955
956 prefCheckValues = new JCheckBox(tr("Check property values."), Config.getPref().getBoolean(PREF_CHECK_VALUES, true));
957 prefCheckValues.setToolTipText(tr("Validate that property values are valid checking against presets."));
958 testPanel.add(prefCheckValues, GBC.std().insets(20, 0, 0, 0));
959
960 prefCheckValuesBeforeUpload = new JCheckBox();
961 prefCheckValuesBeforeUpload.setSelected(Config.getPref().getBoolean(PREF_CHECK_VALUES_BEFORE_UPLOAD, true));
962 testPanel.add(prefCheckValuesBeforeUpload, a);
963
964 prefCheckFixmes = new JCheckBox(tr("Check for FIXMES."), Config.getPref().getBoolean(PREF_CHECK_FIXMES, true));
965 prefCheckFixmes.setToolTipText(tr("Looks for nodes or ways with FIXME in any property value."));
966 testPanel.add(prefCheckFixmes, GBC.std().insets(20, 0, 0, 0));
967
968 prefCheckFixmesBeforeUpload = new JCheckBox();
969 prefCheckFixmesBeforeUpload.setSelected(Config.getPref().getBoolean(PREF_CHECK_FIXMES_BEFORE_UPLOAD, true));
970 testPanel.add(prefCheckFixmesBeforeUpload, a);
971
972 prefCheckPresetsTypes = new JCheckBox(tr("Check for presets types."), Config.getPref().getBoolean(PREF_CHECK_PRESETS_TYPES, true));
973 prefCheckPresetsTypes.setToolTipText(tr("Validate that objects types are valid checking against presets."));
974 testPanel.add(prefCheckPresetsTypes, GBC.std().insets(20, 0, 0, 0));
975
976 prefCheckPresetsTypesBeforeUpload = new JCheckBox();
977 prefCheckPresetsTypesBeforeUpload.setSelected(Config.getPref().getBoolean(PREF_CHECK_PRESETS_TYPES_BEFORE_UPLOAD, true));
978 testPanel.add(prefCheckPresetsTypesBeforeUpload, a);
979 }
980
981 /**
982 * Enables/disables the source list field
983 */
984 public void handlePrefEnable() {
985 boolean selected = prefCheckKeys.isSelected() || prefCheckKeysBeforeUpload.isSelected()
986 || prefCheckComplex.isSelected() || prefCheckComplexBeforeUpload.isSelected();
987 sourcesList.setEnabled(selected);
988 }
989
990 @Override
991 public boolean ok() {
992 enabled = prefCheckKeys.isSelected() || prefCheckValues.isSelected() || prefCheckComplex.isSelected() || prefCheckFixmes.isSelected();
993 testBeforeUpload = prefCheckKeysBeforeUpload.isSelected() || prefCheckValuesBeforeUpload.isSelected()
994 || prefCheckFixmesBeforeUpload.isSelected() || prefCheckComplexBeforeUpload.isSelected();
995
996 Config.getPref().putBoolean(PREF_CHECK_VALUES, prefCheckValues.isSelected());
997 Config.getPref().putBoolean(PREF_CHECK_COMPLEX, prefCheckComplex.isSelected());
998 Config.getPref().putBoolean(PREF_CHECK_KEYS, prefCheckKeys.isSelected());
999 Config.getPref().putBoolean(PREF_CHECK_FIXMES, prefCheckFixmes.isSelected());
1000 Config.getPref().putBoolean(PREF_CHECK_PRESETS_TYPES, prefCheckPresetsTypes.isSelected());
1001 Config.getPref().putBoolean(PREF_CHECK_VALUES_BEFORE_UPLOAD, prefCheckValuesBeforeUpload.isSelected());
1002 Config.getPref().putBoolean(PREF_CHECK_COMPLEX_BEFORE_UPLOAD, prefCheckComplexBeforeUpload.isSelected());
1003 Config.getPref().putBoolean(PREF_CHECK_KEYS_BEFORE_UPLOAD, prefCheckKeysBeforeUpload.isSelected());
1004 Config.getPref().putBoolean(PREF_CHECK_FIXMES_BEFORE_UPLOAD, prefCheckFixmesBeforeUpload.isSelected());
1005 Config.getPref().putBoolean(PREF_CHECK_PRESETS_TYPES_BEFORE_UPLOAD, prefCheckPresetsTypesBeforeUpload.isSelected());
1006 return Config.getPref().putList(PREF_SOURCES, sourcesList.getItems());
1007 }
1008
1009 @Override
1010 public Command fixError(TestError testError) {
1011 List<Command> commands = new ArrayList<>(50);
1012
1013 Collection<? extends OsmPrimitive> primitives = testError.getPrimitives();
1014 for (OsmPrimitive p : primitives) {
1015 Map<String, String> tags = p.getKeys();
1016 if (tags.isEmpty()) {
1017 continue;
1018 }
1019
1020 for (Entry<String, String> prop: tags.entrySet()) {
1021 String key = prop.getKey();
1022 String value = prop.getValue();
1023 if (value == null || value.trim().isEmpty()) {
1024 commands.add(new ChangePropertyCommand(p, key, null));
1025 } else if (value.startsWith(" ") || value.endsWith(" ") || value.contains(" ")) {
1026 commands.add(new ChangePropertyCommand(p, key, Utils.removeWhiteSpaces(value)));
1027 } else if (key.startsWith(" ") || key.endsWith(" ") || key.contains(" ")) {
1028 commands.add(new ChangePropertyKeyCommand(p, key, Utils.removeWhiteSpaces(key)));
1029 } else {
1030 String evalue = Entities.unescape(value);
1031 if (!evalue.equals(value)) {
1032 commands.add(new ChangePropertyCommand(p, key, evalue));
1033 }
1034 }
1035 }
1036 }
1037
1038 if (commands.isEmpty())
1039 return null;
1040 if (commands.size() == 1)
1041 return commands.get(0);
1042
1043 return new SequenceCommand(tr("Fix tags"), commands);
1044 }
1045
1046 @Override
1047 public boolean isFixable(TestError testError) {
1048 if (testError.getTester() instanceof TagChecker) {
1049 int code = testError.getCode();
1050 return code == EMPTY_VALUES || code == INVALID_SPACE ||
1051 code == INVALID_KEY_SPACE || code == INVALID_HTML ||
1052 code == MULTIPLE_SPACES;
1053 }
1054
1055 return false;
1056 }
1057
1058 @Override
1059 public void taggingPresetsModified() {
1060 try {
1061 initializeData();
1062 initializePresets();
1063 analysePresets();
1064 } catch (IOException e) {
1065 Logging.error(e);
1066 }
1067 }
1068}
Note: See TracBrowser for help on using the repository browser.