source: josm/trunk/src/org/openstreetmap/josm/data/validation/tests/TagChecker.java@ 14991

Last change on this file since 14991 was 14991, checked in by Don-vip, 5 years ago

fix #17595 - smarter detection of ZWNJ/ZWJ unicode characters

  • Property svn:eol-style set to native
File size: 43.5 KB
Line 
1// License: GPL. For details, see LICENSE file.
2package org.openstreetmap.josm.data.validation.tests;
3
4import static org.openstreetmap.josm.tools.I18n.marktr;
5import static org.openstreetmap.josm.tools.I18n.tr;
6
7import java.awt.GridBagConstraints;
8import java.awt.event.ActionListener;
9import java.io.BufferedReader;
10import java.io.IOException;
11import java.lang.Character.UnicodeBlock;
12import java.util.ArrayList;
13import java.util.Arrays;
14import java.util.Collection;
15import java.util.Collections;
16import java.util.HashMap;
17import java.util.HashSet;
18import java.util.List;
19import java.util.Locale;
20import java.util.Map;
21import java.util.Map.Entry;
22import java.util.Set;
23import java.util.regex.Pattern;
24
25import javax.swing.JCheckBox;
26import javax.swing.JLabel;
27import javax.swing.JPanel;
28
29import org.openstreetmap.josm.command.ChangePropertyCommand;
30import org.openstreetmap.josm.command.ChangePropertyKeyCommand;
31import org.openstreetmap.josm.command.Command;
32import org.openstreetmap.josm.command.SequenceCommand;
33import org.openstreetmap.josm.data.osm.AbstractPrimitive;
34import org.openstreetmap.josm.data.osm.OsmPrimitive;
35import org.openstreetmap.josm.data.osm.Tag;
36import org.openstreetmap.josm.data.osm.Tagged;
37import org.openstreetmap.josm.data.preferences.sources.ValidatorPrefHelper;
38import org.openstreetmap.josm.data.validation.Severity;
39import org.openstreetmap.josm.data.validation.Test.TagTest;
40import org.openstreetmap.josm.data.validation.TestError;
41import org.openstreetmap.josm.data.validation.util.Entities;
42import org.openstreetmap.josm.gui.progress.ProgressMonitor;
43import org.openstreetmap.josm.gui.tagging.presets.TaggingPreset;
44import org.openstreetmap.josm.gui.tagging.presets.TaggingPresetItem;
45import org.openstreetmap.josm.gui.tagging.presets.TaggingPresets;
46import org.openstreetmap.josm.gui.tagging.presets.items.Check;
47import org.openstreetmap.josm.gui.tagging.presets.items.CheckGroup;
48import org.openstreetmap.josm.gui.tagging.presets.items.KeyedItem;
49import org.openstreetmap.josm.gui.widgets.EditableList;
50import org.openstreetmap.josm.io.CachedFile;
51import org.openstreetmap.josm.spi.preferences.Config;
52import org.openstreetmap.josm.tools.GBC;
53import org.openstreetmap.josm.tools.Logging;
54import org.openstreetmap.josm.tools.MultiMap;
55import org.openstreetmap.josm.tools.Utils;
56
57/**
58 * Check for misspelled or wrong tags
59 *
60 * @author frsantos
61 * @since 3669
62 */
63public class TagChecker extends TagTest {
64
65 /** The config file of ignored tags */
66 public static final String IGNORE_FILE = "resource://data/validator/ignoretags.cfg";
67 /** The config file of dictionary words */
68 public static final String SPELL_FILE = "resource://data/validator/words.cfg";
69
70 /** Normalized keys: the key should be substituted by the value if the key was not found in presets */
71 private static final Map<String, String> harmonizedKeys = new HashMap<>();
72 /** The spell check preset values which are not stored in TaggingPresets */
73 private static volatile HashSet<String> additionalPresetsValueData;
74 /** often used tags which are not in presets */
75 private static volatile MultiMap<String, String> oftenUsedTags = new MultiMap<>();
76
77 private static final Pattern UNWANTED_NON_PRINTING_CONTROL_CHARACTERS = Pattern.compile(
78 "[\\x00-\\x09\\x0B\\x0C\\x0E-\\x1F\\x7F\\u200e-\\u200f\\u202a-\\u202e]");
79
80 /** The TagChecker data */
81 private static final List<String> ignoreDataStartsWith = new ArrayList<>();
82 private static final Set<String> ignoreDataEquals = new HashSet<>();
83 private static final List<String> ignoreDataEndsWith = new ArrayList<>();
84 private static final List<Tag> ignoreDataTag = new ArrayList<>();
85 /** tag keys that have only numerical values in the presets */
86 private static final Set<String> ignoreForLevenshtein = new HashSet<>();
87
88 /** The preferences prefix */
89 protected static final String PREFIX = ValidatorPrefHelper.PREFIX + "." + TagChecker.class.getSimpleName();
90
91 /**
92 * The preference key to check values
93 */
94 public static final String PREF_CHECK_VALUES = PREFIX + ".checkValues";
95 /**
96 * The preference key to check keys
97 */
98 public static final String PREF_CHECK_KEYS = PREFIX + ".checkKeys";
99 /**
100 * The preference key to enable complex checks
101 */
102 public static final String PREF_CHECK_COMPLEX = PREFIX + ".checkComplex";
103 /**
104 * The preference key to search for fixme tags
105 */
106 public static final String PREF_CHECK_FIXMES = PREFIX + ".checkFixmes";
107
108 /**
109 * The preference key for source files
110 * @see #DEFAULT_SOURCES
111 */
112 public static final String PREF_SOURCES = PREFIX + ".source";
113
114 private static final String BEFORE_UPLOAD = "BeforeUpload";
115 /**
116 * The preference key to check keys - used before upload
117 */
118 public static final String PREF_CHECK_KEYS_BEFORE_UPLOAD = PREF_CHECK_KEYS + BEFORE_UPLOAD;
119 /**
120 * The preference key to check values - used before upload
121 */
122 public static final String PREF_CHECK_VALUES_BEFORE_UPLOAD = PREF_CHECK_VALUES + BEFORE_UPLOAD;
123 /**
124 * The preference key to run complex tests - used before upload
125 */
126 public static final String PREF_CHECK_COMPLEX_BEFORE_UPLOAD = PREF_CHECK_COMPLEX + BEFORE_UPLOAD;
127 /**
128 * The preference key to search for fixmes - used before upload
129 */
130 public static final String PREF_CHECK_FIXMES_BEFORE_UPLOAD = PREF_CHECK_FIXMES + BEFORE_UPLOAD;
131
132 private static final int MAX_LEVENSHTEIN_DISTANCE = 2;
133
134 protected boolean checkKeys;
135 protected boolean checkValues;
136 /** Was used for special configuration file, might be used to disable value spell checker. */
137 protected boolean checkComplex;
138 protected boolean checkFixmes;
139
140 protected JCheckBox prefCheckKeys;
141 protected JCheckBox prefCheckValues;
142 protected JCheckBox prefCheckComplex;
143 protected JCheckBox prefCheckFixmes;
144 protected JCheckBox prefCheckPaint;
145
146 protected JCheckBox prefCheckKeysBeforeUpload;
147 protected JCheckBox prefCheckValuesBeforeUpload;
148 protected JCheckBox prefCheckComplexBeforeUpload;
149 protected JCheckBox prefCheckFixmesBeforeUpload;
150 protected JCheckBox prefCheckPaintBeforeUpload;
151
152 // CHECKSTYLE.OFF: SingleSpaceSeparator
153 protected static final int EMPTY_VALUES = 1200;
154 protected static final int INVALID_KEY = 1201;
155 protected static final int INVALID_VALUE = 1202;
156 protected static final int FIXME = 1203;
157 protected static final int INVALID_SPACE = 1204;
158 protected static final int INVALID_KEY_SPACE = 1205;
159 protected static final int INVALID_HTML = 1206; /* 1207 was PAINT */
160 protected static final int LONG_VALUE = 1208;
161 protected static final int LONG_KEY = 1209;
162 protected static final int LOW_CHAR_VALUE = 1210;
163 protected static final int LOW_CHAR_KEY = 1211;
164 protected static final int MISSPELLED_VALUE = 1212;
165 protected static final int MISSPELLED_KEY = 1213;
166 protected static final int MULTIPLE_SPACES = 1214;
167 protected static final int MISSPELLED_VALUE_NO_FIX = 1215;
168 protected static final int UNUSUAL_UNICODE_CHAR_VALUE = 1216;
169 // CHECKSTYLE.ON: SingleSpaceSeparator
170
171 protected EditableList sourcesList;
172
173 private static final List<String> DEFAULT_SOURCES = Arrays.asList(IGNORE_FILE, SPELL_FILE);
174
175 /**
176 * Constructor
177 */
178 public TagChecker() {
179 super(tr("Tag checker"), tr("This test checks for errors in tag keys and values."));
180 }
181
182 @Override
183 public void initialize() throws IOException {
184 initializeData();
185 initializePresets();
186 analysePresets();
187 }
188
189 /**
190 * Add presets that contain only numerical values to the ignore list
191 */
192 private static void analysePresets() {
193 for (String key : TaggingPresets.getPresetKeys()) {
194 if (isKeyIgnored(key))
195 continue;
196 boolean allNumerical = true;
197 Set<String> values = TaggingPresets.getPresetValues(key);
198 if (values.isEmpty())
199 allNumerical = false;
200 for (String val : values) {
201 if (!isNum(val)) {
202 allNumerical = false;
203 break;
204 }
205 }
206 if (allNumerical) {
207 ignoreForLevenshtein.add(key);
208 }
209 }
210 }
211
212 /**
213 * Reads the spell-check file into a HashMap.
214 * The data file is a list of words, beginning with +/-. If it starts with +,
215 * the word is valid, but if it starts with -, the word should be replaced
216 * by the nearest + word before this.
217 *
218 * @throws IOException if any I/O error occurs
219 */
220 private static void initializeData() throws IOException {
221 ignoreDataStartsWith.clear();
222 ignoreDataEquals.clear();
223 ignoreDataEndsWith.clear();
224 ignoreDataTag.clear();
225 harmonizedKeys.clear();
226 ignoreForLevenshtein.clear();
227 oftenUsedTags.clear();
228
229 StringBuilder errorSources = new StringBuilder();
230 for (String source : Config.getPref().getList(PREF_SOURCES, DEFAULT_SOURCES)) {
231 try (
232 CachedFile cf = new CachedFile(source);
233 BufferedReader reader = cf.getContentReader()
234 ) {
235 String okValue = null;
236 boolean tagcheckerfile = false;
237 boolean ignorefile = false;
238 boolean isFirstLine = true;
239 String line;
240 while ((line = reader.readLine()) != null) {
241 if (line.isEmpty()) {
242 // ignore
243 } else if (line.startsWith("#")) {
244 if (line.startsWith("# JOSM TagChecker")) {
245 tagcheckerfile = true;
246 Logging.error(tr("Ignoring {0}. Support was dropped", source));
247 } else
248 if (line.startsWith("# JOSM IgnoreTags")) {
249 ignorefile = true;
250 if (!DEFAULT_SOURCES.contains(source)) {
251 Logging.info(tr("Adding {0} to ignore tags", source));
252 }
253 }
254 } else if (ignorefile) {
255 parseIgnoreFileLine(source, line);
256 } else if (tagcheckerfile) {
257 // ignore
258 } else if (line.charAt(0) == '+') {
259 okValue = line.substring(1);
260 } else if (line.charAt(0) == '-' && okValue != null) {
261 String hk = harmonizeKey(line.substring(1));
262 if (!okValue.equals(hk) && harmonizedKeys.put(hk, okValue) != null) {
263 Logging.debug(tr("Line was ignored: {0}", line));
264 }
265 } else {
266 Logging.error(tr("Invalid spellcheck line: {0}", line));
267 }
268 if (isFirstLine) {
269 isFirstLine = false;
270 if (!(tagcheckerfile || ignorefile) && !DEFAULT_SOURCES.contains(source)) {
271 Logging.info(tr("Adding {0} to spellchecker", source));
272 }
273 }
274 }
275 } catch (IOException e) {
276 Logging.error(e);
277 errorSources.append(source).append('\n');
278 }
279 }
280
281 if (errorSources.length() > 0)
282 throw new IOException(tr("Could not access data file(s):\n{0}", errorSources));
283 }
284
285 /**
286 * Parse a line found in a configuration file
287 * @param source name of configuration file
288 * @param line the line to parse
289 */
290 private static void parseIgnoreFileLine(String source, String line) {
291 line = line.trim();
292 if (line.length() < 4) {
293 return;
294 }
295 try {
296 String key = line.substring(0, 2);
297 line = line.substring(2);
298
299 switch (key) {
300 case "S:":
301 ignoreDataStartsWith.add(line);
302 break;
303 case "E:":
304 ignoreDataEquals.add(line);
305 addToKeyDictionary(line);
306 break;
307 case "F:":
308 ignoreDataEndsWith.add(line);
309 break;
310 case "K:":
311 Tag tag = Tag.ofString(line);
312 ignoreDataTag.add(tag);
313 oftenUsedTags.put(tag.getKey(), tag.getValue());
314 addToKeyDictionary(tag.getKey());
315 break;
316 default:
317 if (!key.startsWith(";")) {
318 Logging.warn("Unsupported TagChecker key: " + key);
319 }
320 }
321 } catch (IllegalArgumentException e) {
322 Logging.error("Invalid line in {0} : {1}", source, e.getMessage());
323 Logging.trace(e);
324 }
325 }
326
327 private static void addToKeyDictionary(String key) {
328 if (key != null) {
329 String hk = harmonizeKey(key);
330 if (!key.equals(hk)) {
331 harmonizedKeys.put(hk, key);
332 }
333 }
334 }
335
336 /**
337 * Reads the presets data.
338 *
339 */
340 public static void initializePresets() {
341
342 if (!Config.getPref().getBoolean(PREF_CHECK_VALUES, true))
343 return;
344
345 Collection<TaggingPreset> presets = TaggingPresets.getTaggingPresets();
346 if (!presets.isEmpty()) {
347 initAdditionalPresetsValueData();
348 for (TaggingPreset p : presets) {
349 for (TaggingPresetItem i : p.data) {
350 if (i instanceof KeyedItem) {
351 addPresetValue((KeyedItem) i);
352 } else if (i instanceof CheckGroup) {
353 for (Check c : ((CheckGroup) i).checks) {
354 addPresetValue(c);
355 }
356 }
357 }
358 }
359 }
360 }
361
362 private static void initAdditionalPresetsValueData() {
363 additionalPresetsValueData = new HashSet<>();
364 for (String a : AbstractPrimitive.getUninterestingKeys()) {
365 additionalPresetsValueData.add(a);
366 }
367 for (String a : Config.getPref().getList(ValidatorPrefHelper.PREFIX + ".knownkeys",
368 Arrays.asList("is_in", "int_ref", "fixme", "population"))) {
369 additionalPresetsValueData.add(a);
370 }
371 }
372
373 private static void addPresetValue(KeyedItem ky) {
374 if (ky.key != null && ky.getValues() != null) {
375 addToKeyDictionary(ky.key);
376 }
377 }
378
379 /**
380 * Checks given string (key or value) if it contains unwanted non-printing control characters (either ASCII or Unicode bidi characters)
381 * @param s string to check
382 * @return {@code true} if {@code s} contains non-printing control characters
383 */
384 static boolean containsUnwantedNonPrintingControlCharacter(String s) {
385 return s != null && !s.isEmpty() && (
386 isJoiningChar(s.charAt(0)) ||
387 isJoiningChar(s.charAt(s.length() - 1)) ||
388 s.chars().anyMatch(c -> (isAsciiControlChar(c) && !isNewLineChar(c)) || isBidiControlChar(c))
389 );
390 }
391
392 private static boolean isAsciiControlChar(int c) {
393 return c < 0x20 || c == 0x7F;
394 }
395
396 private static boolean isNewLineChar(int c) {
397 return c == 0x0a || c == 0x0d;
398 }
399
400 private static boolean isJoiningChar(int c) {
401 return c == 0x200c || c == 0x200d; // ZWNJ, ZWJ
402 }
403
404 private static boolean isBidiControlChar(int c) {
405 /* check for range 0x200e to 0x200f (LRM, RLM) or
406 0x202a to 0x202e (LRE, RLE, PDF, LRO, RLO) */
407 return (c >= 0x200e && c <= 0x200f) || (c >= 0x202a && c <= 0x202e);
408 }
409
410 static String removeUnwantedNonPrintingControlCharacters(String s) {
411 // Remove all unwanted characters
412 String result = UNWANTED_NON_PRINTING_CONTROL_CHARACTERS.matcher(s).replaceAll("");
413 // Remove joining characters located at the beginning of the string
414 while (!result.isEmpty() && isJoiningChar(result.charAt(0))) {
415 result = result.substring(1);
416 }
417 // Remove joining characters located at the end of the string
418 while (!result.isEmpty() && isJoiningChar(result.charAt(result.length() - 1))) {
419 result = result.substring(0, result.length() - 1);
420 }
421 return result;
422 }
423
424 private static boolean containsUnusualUnicodeCharacter(String key, String value) {
425 return value != null && value.chars().anyMatch(c -> isUnusualUnicodeBlock(key, UnicodeBlock.of(c)));
426 }
427
428 /**
429 * Detects highly suspicious Unicode characters that have been seen in OSM database.
430 * @param key tag key
431 * @param b Unicode block of the current character
432 * @return {@code true} if the current unicode block is very unusual for the given key
433 */
434 private static boolean isUnusualUnicodeBlock(String key, UnicodeBlock b) {
435 return isUnusualPhoneticUse(key, b) || isUnusualBmpUse(b) || isUnusualSmpUse(b);
436 }
437
438 private static boolean isUnusualPhoneticUse(String key, UnicodeBlock b) {
439 return (b == UnicodeBlock.IPA_EXTENSIONS // U+0250..U+02AF
440 || b == UnicodeBlock.PHONETIC_EXTENSIONS // U+1D00..U+1D7F
441 || b == UnicodeBlock.PHONETIC_EXTENSIONS_SUPPLEMENT) // U+1D80..U+1DBF
442 && !key.endsWith(":pronunciation");
443 }
444
445 private static boolean isUnusualBmpUse(UnicodeBlock b) {
446 // CHECKSTYLE.OFF: BooleanExpressionComplexity
447 return b == UnicodeBlock.COMBINING_MARKS_FOR_SYMBOLS // U+20D0..U+20FF
448 || b == UnicodeBlock.ARROWS // U+2190..U+21FF
449 || b == UnicodeBlock.MATHEMATICAL_OPERATORS // U+2200..U+22FF
450 || b == UnicodeBlock.ENCLOSED_ALPHANUMERICS // U+2460..U+24FF
451 || b == UnicodeBlock.BOX_DRAWING // U+2500..U+257F
452 || b == UnicodeBlock.GEOMETRIC_SHAPES // U+25A0..U+25FF
453 || b == UnicodeBlock.DINGBATS // U+2700..U+27BF
454 || b == UnicodeBlock.MISCELLANEOUS_SYMBOLS_AND_ARROWS // U+2B00..U+2BFF
455 || b == UnicodeBlock.GLAGOLITIC // U+2C00..U+2C5F
456 || b == UnicodeBlock.HANGUL_COMPATIBILITY_JAMO // U+3130..U+318F
457 || b == UnicodeBlock.ENCLOSED_CJK_LETTERS_AND_MONTHS // U+3200..U+32FF
458 || b == UnicodeBlock.LATIN_EXTENDED_D // U+A720..U+A7FF
459 || b == UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS // U+F900..U+FAFF
460 || b == UnicodeBlock.ALPHABETIC_PRESENTATION_FORMS // U+FB00..U+FB4F
461 || b == UnicodeBlock.VARIATION_SELECTORS // U+FE00..U+FE0F
462 || b == UnicodeBlock.SPECIALS; // U+FFF0..U+FFFF
463 // CHECKSTYLE.ON: BooleanExpressionComplexity
464 }
465
466 private static boolean isUnusualSmpUse(UnicodeBlock b) {
467 // UnicodeBlock.SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS is only defined in Java 9+
468 return b == UnicodeBlock.MUSICAL_SYMBOLS // U+1D100..U+1D1FF
469 || b == UnicodeBlock.ENCLOSED_ALPHANUMERIC_SUPPLEMENT // U+1F100..U+1F1FF
470 || b == UnicodeBlock.EMOTICONS // U+1F600..U+1F64F
471 || b == UnicodeBlock.TRANSPORT_AND_MAP_SYMBOLS; // U+1F680..U+1F6FF
472 }
473
474 /**
475 * Get set of preset values for the given key.
476 * @param key the key
477 * @return null if key is not in presets or in additionalPresetsValueData,
478 * else a set which might be empty.
479 */
480 private static Set<String> getPresetValues(String key) {
481 Set<String> res = TaggingPresets.getPresetValues(key);
482 if (res != null)
483 return res;
484 if (additionalPresetsValueData.contains(key))
485 return Collections.emptySet();
486 // null means key is not known
487 return null;
488 }
489
490 /**
491 * Determines if the given key is in internal presets.
492 * @param key key
493 * @return {@code true} if the given key is in internal presets
494 * @since 9023
495 */
496 public static boolean isKeyInPresets(String key) {
497 return TaggingPresets.getPresetValues(key) != null;
498 }
499
500 /**
501 * Determines if the given tag is in internal presets.
502 * @param key key
503 * @param value value
504 * @return {@code true} if the given tag is in internal presets
505 * @since 9023
506 */
507 public static boolean isTagInPresets(String key, String value) {
508 final Set<String> values = getPresetValues(key);
509 return values != null && values.contains(value);
510 }
511
512 /**
513 * Returns the list of ignored tags.
514 * @return the list of ignored tags
515 * @since 9023
516 */
517 public static List<Tag> getIgnoredTags() {
518 return new ArrayList<>(ignoreDataTag);
519 }
520
521 /**
522 * Determines if the given tag key is ignored for checks "key/tag not in presets".
523 * @param key key
524 * @return true if the given key is ignored
525 */
526 private static boolean isKeyIgnored(String key) {
527 if (ignoreDataEquals.contains(key)) {
528 return true;
529 }
530 for (String a : ignoreDataStartsWith) {
531 if (key.startsWith(a)) {
532 return true;
533 }
534 }
535 for (String a : ignoreDataEndsWith) {
536 if (key.endsWith(a)) {
537 return true;
538 }
539 }
540 return false;
541 }
542
543 /**
544 * Determines if the given tag is ignored for checks "key/tag not in presets".
545 * @param key key
546 * @param value value
547 * @return {@code true} if the given tag is ignored
548 * @since 9023
549 */
550 public static boolean isTagIgnored(String key, String value) {
551 if (isKeyIgnored(key))
552 return true;
553 final Set<String> values = getPresetValues(key);
554 if (values != null && values.isEmpty())
555 return true;
556 if (!isTagInPresets(key, value)) {
557 for (Tag a : ignoreDataTag) {
558 if (key.equals(a.getKey()) && value.equals(a.getValue())) {
559 return true;
560 }
561 }
562 }
563 return false;
564 }
565
566 /**
567 * Checks the primitive tags
568 * @param p The primitive to check
569 */
570 @Override
571 public void check(OsmPrimitive p) {
572 if (!p.isTagged())
573 return;
574
575 // Just a collection to know if a primitive has been already marked with error
576 MultiMap<OsmPrimitive, String> withErrors = new MultiMap<>();
577
578 for (Entry<String, String> prop : p.getKeys().entrySet()) {
579 String s = marktr("Tag ''{0}'' invalid.");
580 String key = prop.getKey();
581 String value = prop.getValue();
582
583 if (checkKeys) {
584 checkSingleTagKeySimple(withErrors, p, s, key);
585 }
586 if (checkValues) {
587 checkSingleTagValueSimple(withErrors, p, s, key, value);
588 checkSingleTagComplex(withErrors, p, key, value);
589 }
590 if (checkFixmes && key != null && value != null && !value.isEmpty() && isFixme(key, value) && !withErrors.contains(p, "FIXME")) {
591 errors.add(TestError.builder(this, Severity.OTHER, FIXME)
592 .message(tr("FIXMES"))
593 .primitives(p)
594 .build());
595 withErrors.put(p, "FIXME");
596 }
597 }
598 }
599
600 private void checkSingleTagValueSimple(MultiMap<OsmPrimitive, String> withErrors, OsmPrimitive p, String s, String key, String value) {
601 if (!checkValues || value == null)
602 return;
603 if ((containsUnwantedNonPrintingControlCharacter(value)) && !withErrors.contains(p, "ICV")) {
604 errors.add(TestError.builder(this, Severity.WARNING, LOW_CHAR_VALUE)
605 .message(tr("Tag value contains non-printing character"), s, key)
606 .primitives(p)
607 .fix(() -> new ChangePropertyCommand(p, key, removeUnwantedNonPrintingControlCharacters(value)))
608 .build());
609 withErrors.put(p, "ICV");
610 }
611 if ((containsUnusualUnicodeCharacter(key, value)) && !withErrors.contains(p, "UUCV")) {
612 errors.add(TestError.builder(this, Severity.WARNING, UNUSUAL_UNICODE_CHAR_VALUE)
613 .message(tr("Tag value contains unusual Unicode character"), s, key)
614 .primitives(p)
615 .build());
616 withErrors.put(p, "UUCV");
617 }
618 if ((value.length() > Tagged.MAX_TAG_LENGTH) && !withErrors.contains(p, "LV")) {
619 errors.add(TestError.builder(this, Severity.ERROR, LONG_VALUE)
620 .message(tr("Tag value longer than {0} characters ({1} characters)", Tagged.MAX_TAG_LENGTH, value.length()), s, key)
621 .primitives(p)
622 .build());
623 withErrors.put(p, "LV");
624 }
625 if ((value.trim().isEmpty()) && !withErrors.contains(p, "EV")) {
626 errors.add(TestError.builder(this, Severity.WARNING, EMPTY_VALUES)
627 .message(tr("Tags with empty values"), s, key)
628 .primitives(p)
629 .build());
630 withErrors.put(p, "EV");
631 }
632 final String errTypeSpace = "SPACE";
633 if ((value.startsWith(" ") || value.endsWith(" ")) && !withErrors.contains(p, errTypeSpace)) {
634 errors.add(TestError.builder(this, Severity.WARNING, INVALID_SPACE)
635 .message(tr("Property values start or end with white space"), s, key)
636 .primitives(p)
637 .build());
638 withErrors.put(p, errTypeSpace);
639 }
640 if (value.contains(" ") && !withErrors.contains(p, errTypeSpace)) {
641 errors.add(TestError.builder(this, Severity.WARNING, MULTIPLE_SPACES)
642 .message(tr("Property values contain multiple white spaces"), s, key)
643 .primitives(p)
644 .build());
645 withErrors.put(p, errTypeSpace);
646 }
647 if (!value.equals(Entities.unescape(value)) && !withErrors.contains(p, "HTML")) {
648 errors.add(TestError.builder(this, Severity.OTHER, INVALID_HTML)
649 .message(tr("Property values contain HTML entity"), s, key)
650 .primitives(p)
651 .build());
652 withErrors.put(p, "HTML");
653 }
654 }
655
656 private void checkSingleTagKeySimple(MultiMap<OsmPrimitive, String> withErrors, OsmPrimitive p, String s, String key) {
657 if (!checkKeys || key == null)
658 return;
659 if ((containsUnwantedNonPrintingControlCharacter(key)) && !withErrors.contains(p, "ICK")) {
660 errors.add(TestError.builder(this, Severity.WARNING, LOW_CHAR_KEY)
661 .message(tr("Tag key contains non-printing character"), s, key)
662 .primitives(p)
663 .fix(() -> new ChangePropertyCommand(p, key, removeUnwantedNonPrintingControlCharacters(key)))
664 .build());
665 withErrors.put(p, "ICK");
666 }
667 if (key.length() > Tagged.MAX_TAG_LENGTH && !withErrors.contains(p, "LK")) {
668 errors.add(TestError.builder(this, Severity.ERROR, LONG_KEY)
669 .message(tr("Tag key longer than {0} characters ({1} characters)", Tagged.MAX_TAG_LENGTH, key.length()), s, key)
670 .primitives(p)
671 .build());
672 withErrors.put(p, "LK");
673 }
674 if (key.indexOf(' ') >= 0 && !withErrors.contains(p, "IPK")) {
675 errors.add(TestError.builder(this, Severity.WARNING, INVALID_KEY_SPACE)
676 .message(tr("Invalid white space in property key"), s, key)
677 .primitives(p)
678 .build());
679 withErrors.put(p, "IPK");
680 }
681 }
682
683 private void checkSingleTagComplex(MultiMap<OsmPrimitive, String> withErrors, OsmPrimitive p, String key, String value) {
684 if (!checkValues || key == null || value == null || value.isEmpty())
685 return;
686 if (additionalPresetsValueData != null && !isTagIgnored(key, value)) {
687 if (!isKeyInPresets(key)) {
688 spellCheckKey(withErrors, p, key);
689 } else if (!isTagInPresets(key, value)) {
690 if (oftenUsedTags.contains(key, value)) {
691 // tag is quite often used but not in presets
692 errors.add(TestError.builder(this, Severity.OTHER, INVALID_VALUE)
693 .message(tr("Presets do not contain property value"),
694 marktr("Value ''{0}'' for key ''{1}'' not in presets, but is known."), value, key)
695 .primitives(p)
696 .build());
697 withErrors.put(p, "UPV");
698 } else {
699 tryGuess(p, key, value, withErrors);
700 }
701 }
702 }
703 }
704
705 private void spellCheckKey(MultiMap<OsmPrimitive, String> withErrors, OsmPrimitive p, String key) {
706 String prettifiedKey = harmonizeKey(key);
707 String fixedKey;
708 if (ignoreDataEquals.contains(prettifiedKey)) {
709 fixedKey = prettifiedKey;
710 } else {
711 fixedKey = isKeyInPresets(prettifiedKey) ? prettifiedKey : harmonizedKeys.get(prettifiedKey);
712 }
713 if (fixedKey == null) {
714 for (Tag a : ignoreDataTag) {
715 if (a.getKey().equals(prettifiedKey)) {
716 fixedKey = prettifiedKey;
717 break;
718 }
719 }
720 }
721
722 if (fixedKey != null && !"".equals(fixedKey) && !fixedKey.equals(key)) {
723 final String proposedKey = fixedKey;
724 // misspelled preset key
725 final TestError.Builder error = TestError.builder(this, Severity.WARNING, MISSPELLED_KEY)
726 .message(tr("Misspelled property key"), marktr("Key ''{0}'' looks like ''{1}''."), key, proposedKey)
727 .primitives(p);
728 if (p.hasKey(fixedKey)) {
729 errors.add(error.build());
730 } else {
731 errors.add(error.fix(() -> new ChangePropertyKeyCommand(p, key, proposedKey)).build());
732 }
733 withErrors.put(p, "WPK");
734 } else {
735 errors.add(TestError.builder(this, Severity.OTHER, INVALID_KEY)
736 .message(tr("Presets do not contain property key"), marktr("Key ''{0}'' not in presets."), key)
737 .primitives(p)
738 .build());
739 withErrors.put(p, "UPK");
740 }
741 }
742
743 private void tryGuess(OsmPrimitive p, String key, String value, MultiMap<OsmPrimitive, String> withErrors) {
744 // try to fix common typos and check again if value is still unknown
745 final String harmonizedValue = harmonizeValue(value);
746 if (harmonizedValue == null || harmonizedValue.isEmpty())
747 return;
748 String fixedValue = null;
749 List<Set<String>> sets = new ArrayList<>();
750 Set<String> presetValues = getPresetValues(key);
751 if (presetValues != null)
752 sets.add(presetValues);
753 Set<String> usedValues = oftenUsedTags.get(key);
754 if (usedValues != null)
755 sets.add(usedValues);
756 for (Set<String> possibleValues: sets) {
757 if (possibleValues.contains(harmonizedValue)) {
758 fixedValue = harmonizedValue;
759 break;
760 }
761 }
762 if (fixedValue == null && !ignoreForLevenshtein.contains(key)) {
763 int maxPresetValueLen = 0;
764 List<String> fixVals = new ArrayList<>();
765 // use Levenshtein distance to find typical typos
766 int minDist = MAX_LEVENSHTEIN_DISTANCE + 1;
767 String closest = null;
768 for (Set<String> possibleValues: sets) {
769 for (String possibleVal : possibleValues) {
770 if (possibleVal.isEmpty())
771 continue;
772 maxPresetValueLen = Math.max(maxPresetValueLen, possibleVal.length());
773 if (harmonizedValue.length() < 3 && possibleVal.length() >= harmonizedValue.length() + MAX_LEVENSHTEIN_DISTANCE) {
774 // don't suggest fix value when given value is short and lengths are too different
775 // for example surface=u would result in surface=mud
776 continue;
777 }
778 int dist = Utils.getLevenshteinDistance(possibleVal, harmonizedValue);
779 if (dist >= harmonizedValue.length()) {
780 // short value, all characters are different. Don't warn, might say Value '10' for key 'fee' looks like 'no'.
781 continue;
782 }
783 if (dist < minDist) {
784 closest = possibleVal;
785 minDist = dist;
786 fixVals.clear();
787 fixVals.add(possibleVal);
788 } else if (dist == minDist) {
789 fixVals.add(possibleVal);
790 }
791 }
792 }
793
794 if (minDist <= MAX_LEVENSHTEIN_DISTANCE && maxPresetValueLen > MAX_LEVENSHTEIN_DISTANCE
795 && (harmonizedValue.length() > 3 || minDist < MAX_LEVENSHTEIN_DISTANCE)) {
796 if (fixVals.size() < 2) {
797 fixedValue = closest;
798 } else {
799 Collections.sort(fixVals);
800 // misspelled preset value with multiple good alternatives
801 errors.add(TestError.builder(this, Severity.WARNING, MISSPELLED_VALUE_NO_FIX)
802 .message(tr("Unknown property value"),
803 marktr("Value ''{0}'' for key ''{1}'' is unknown, maybe one of {2} is meant?"),
804 value, key, fixVals)
805 .primitives(p).build());
806 withErrors.put(p, "WPV");
807 return;
808 }
809 }
810 }
811 if (fixedValue != null && !fixedValue.equals(value)) {
812 final String newValue = fixedValue;
813 // misspelled preset value
814 errors.add(TestError.builder(this, Severity.WARNING, MISSPELLED_VALUE)
815 .message(tr("Unknown property value"),
816 marktr("Value ''{0}'' for key ''{1}'' is unknown, maybe ''{2}'' is meant?"), value, key, newValue)
817 .primitives(p)
818 .build());
819 withErrors.put(p, "WPV");
820 } else {
821 // unknown preset value
822 errors.add(TestError.builder(this, Severity.OTHER, INVALID_VALUE)
823 .message(tr("Presets do not contain property value"),
824 marktr("Value ''{0}'' for key ''{1}'' not in presets."), value, key)
825 .primitives(p)
826 .build());
827 withErrors.put(p, "UPV");
828 }
829 }
830
831 private static boolean isNum(String harmonizedValue) {
832 try {
833 Double.parseDouble(harmonizedValue);
834 return true;
835 } catch (NumberFormatException e) {
836 return false;
837 }
838 }
839
840 private static boolean isFixme(String key, String value) {
841 return key.toLowerCase(Locale.ENGLISH).contains("fixme") || key.contains("todo")
842 || value.toLowerCase(Locale.ENGLISH).contains("fixme") || value.contains("check and delete");
843 }
844
845 private static String harmonizeKey(String key) {
846 return Utils.strip(key.toLowerCase(Locale.ENGLISH).replace('-', '_').replace(':', '_').replace(' ', '_'), "-_;:,");
847 }
848
849 private static String harmonizeValue(String value) {
850 return Utils.strip(value.toLowerCase(Locale.ENGLISH).replace('-', '_').replace(' ', '_'), "-_;:,");
851 }
852
853 @Override
854 public void startTest(ProgressMonitor monitor) {
855 super.startTest(monitor);
856 checkKeys = Config.getPref().getBoolean(PREF_CHECK_KEYS, true);
857 if (isBeforeUpload) {
858 checkKeys = checkKeys && Config.getPref().getBoolean(PREF_CHECK_KEYS_BEFORE_UPLOAD, true);
859 }
860
861 checkValues = Config.getPref().getBoolean(PREF_CHECK_VALUES, true);
862 if (isBeforeUpload) {
863 checkValues = checkValues && Config.getPref().getBoolean(PREF_CHECK_VALUES_BEFORE_UPLOAD, true);
864 }
865
866 checkComplex = Config.getPref().getBoolean(PREF_CHECK_COMPLEX, true);
867 if (isBeforeUpload) {
868 checkComplex = checkComplex && Config.getPref().getBoolean(PREF_CHECK_COMPLEX_BEFORE_UPLOAD, true);
869 }
870
871 checkFixmes = Config.getPref().getBoolean(PREF_CHECK_FIXMES, true);
872 if (isBeforeUpload) {
873 checkFixmes = checkFixmes && Config.getPref().getBoolean(PREF_CHECK_FIXMES_BEFORE_UPLOAD, true);
874 }
875 }
876
877 @Override
878 public void visit(Collection<OsmPrimitive> selection) {
879 if (checkKeys || checkValues || checkComplex || checkFixmes) {
880 super.visit(selection);
881 }
882 }
883
884 @Override
885 public void addGui(JPanel testPanel) {
886 GBC a = GBC.eol();
887 a.anchor = GridBagConstraints.EAST;
888
889 testPanel.add(new JLabel(name+" :"), GBC.eol().insets(3, 0, 0, 0));
890
891 prefCheckKeys = new JCheckBox(tr("Check property keys."), Config.getPref().getBoolean(PREF_CHECK_KEYS, true));
892 prefCheckKeys.setToolTipText(tr("Validate that property keys are valid checking against list of words."));
893 testPanel.add(prefCheckKeys, GBC.std().insets(20, 0, 0, 0));
894
895 prefCheckKeysBeforeUpload = new JCheckBox();
896 prefCheckKeysBeforeUpload.setSelected(Config.getPref().getBoolean(PREF_CHECK_KEYS_BEFORE_UPLOAD, true));
897 testPanel.add(prefCheckKeysBeforeUpload, a);
898
899 prefCheckComplex = new JCheckBox(tr("Use complex property checker."), Config.getPref().getBoolean(PREF_CHECK_COMPLEX, true));
900 prefCheckComplex.setToolTipText(tr("Validate property values and tags using complex rules."));
901 testPanel.add(prefCheckComplex, GBC.std().insets(20, 0, 0, 0));
902
903 prefCheckComplexBeforeUpload = new JCheckBox();
904 prefCheckComplexBeforeUpload.setSelected(Config.getPref().getBoolean(PREF_CHECK_COMPLEX_BEFORE_UPLOAD, true));
905 testPanel.add(prefCheckComplexBeforeUpload, a);
906
907 final Collection<String> sources = Config.getPref().getList(PREF_SOURCES, DEFAULT_SOURCES);
908 sourcesList = new EditableList(tr("TagChecker source"));
909 sourcesList.setItems(sources);
910 testPanel.add(new JLabel(tr("Data sources ({0})", "*.cfg")), GBC.eol().insets(23, 0, 0, 0));
911 testPanel.add(sourcesList, GBC.eol().fill(GridBagConstraints.HORIZONTAL).insets(23, 0, 0, 0));
912
913 ActionListener disableCheckActionListener = e -> handlePrefEnable();
914 prefCheckKeys.addActionListener(disableCheckActionListener);
915 prefCheckKeysBeforeUpload.addActionListener(disableCheckActionListener);
916 prefCheckComplex.addActionListener(disableCheckActionListener);
917 prefCheckComplexBeforeUpload.addActionListener(disableCheckActionListener);
918
919 handlePrefEnable();
920
921 prefCheckValues = new JCheckBox(tr("Check property values."), Config.getPref().getBoolean(PREF_CHECK_VALUES, true));
922 prefCheckValues.setToolTipText(tr("Validate that property values are valid checking against presets."));
923 testPanel.add(prefCheckValues, GBC.std().insets(20, 0, 0, 0));
924
925 prefCheckValuesBeforeUpload = new JCheckBox();
926 prefCheckValuesBeforeUpload.setSelected(Config.getPref().getBoolean(PREF_CHECK_VALUES_BEFORE_UPLOAD, true));
927 testPanel.add(prefCheckValuesBeforeUpload, a);
928
929 prefCheckFixmes = new JCheckBox(tr("Check for FIXMES."), Config.getPref().getBoolean(PREF_CHECK_FIXMES, true));
930 prefCheckFixmes.setToolTipText(tr("Looks for nodes or ways with FIXME in any property value."));
931 testPanel.add(prefCheckFixmes, GBC.std().insets(20, 0, 0, 0));
932
933 prefCheckFixmesBeforeUpload = new JCheckBox();
934 prefCheckFixmesBeforeUpload.setSelected(Config.getPref().getBoolean(PREF_CHECK_FIXMES_BEFORE_UPLOAD, true));
935 testPanel.add(prefCheckFixmesBeforeUpload, a);
936 }
937
938 /**
939 * Enables/disables the source list field
940 */
941 public void handlePrefEnable() {
942 boolean selected = prefCheckKeys.isSelected() || prefCheckKeysBeforeUpload.isSelected()
943 || prefCheckComplex.isSelected() || prefCheckComplexBeforeUpload.isSelected();
944 sourcesList.setEnabled(selected);
945 }
946
947 @Override
948 public boolean ok() {
949 enabled = prefCheckKeys.isSelected() || prefCheckValues.isSelected() || prefCheckComplex.isSelected() || prefCheckFixmes.isSelected();
950 testBeforeUpload = prefCheckKeysBeforeUpload.isSelected() || prefCheckValuesBeforeUpload.isSelected()
951 || prefCheckFixmesBeforeUpload.isSelected() || prefCheckComplexBeforeUpload.isSelected();
952
953 Config.getPref().putBoolean(PREF_CHECK_VALUES, prefCheckValues.isSelected());
954 Config.getPref().putBoolean(PREF_CHECK_COMPLEX, prefCheckComplex.isSelected());
955 Config.getPref().putBoolean(PREF_CHECK_KEYS, prefCheckKeys.isSelected());
956 Config.getPref().putBoolean(PREF_CHECK_FIXMES, prefCheckFixmes.isSelected());
957 Config.getPref().putBoolean(PREF_CHECK_VALUES_BEFORE_UPLOAD, prefCheckValuesBeforeUpload.isSelected());
958 Config.getPref().putBoolean(PREF_CHECK_COMPLEX_BEFORE_UPLOAD, prefCheckComplexBeforeUpload.isSelected());
959 Config.getPref().putBoolean(PREF_CHECK_KEYS_BEFORE_UPLOAD, prefCheckKeysBeforeUpload.isSelected());
960 Config.getPref().putBoolean(PREF_CHECK_FIXMES_BEFORE_UPLOAD, prefCheckFixmesBeforeUpload.isSelected());
961 return Config.getPref().putList(PREF_SOURCES, sourcesList.getItems());
962 }
963
964 @Override
965 public Command fixError(TestError testError) {
966 List<Command> commands = new ArrayList<>(50);
967
968 Collection<? extends OsmPrimitive> primitives = testError.getPrimitives();
969 for (OsmPrimitive p : primitives) {
970 Map<String, String> tags = p.getKeys();
971 if (tags.isEmpty()) {
972 continue;
973 }
974
975 for (Entry<String, String> prop: tags.entrySet()) {
976 String key = prop.getKey();
977 String value = prop.getValue();
978 if (value == null || value.trim().isEmpty()) {
979 commands.add(new ChangePropertyCommand(p, key, null));
980 } else if (value.startsWith(" ") || value.endsWith(" ") || value.contains(" ")) {
981 commands.add(new ChangePropertyCommand(p, key, Utils.removeWhiteSpaces(value)));
982 } else if (key.startsWith(" ") || key.endsWith(" ") || key.contains(" ")) {
983 commands.add(new ChangePropertyKeyCommand(p, key, Utils.removeWhiteSpaces(key)));
984 } else {
985 String evalue = Entities.unescape(value);
986 if (!evalue.equals(value)) {
987 commands.add(new ChangePropertyCommand(p, key, evalue));
988 }
989 }
990 }
991 }
992
993 if (commands.isEmpty())
994 return null;
995 if (commands.size() == 1)
996 return commands.get(0);
997
998 return new SequenceCommand(tr("Fix tags"), commands);
999 }
1000
1001 @Override
1002 public boolean isFixable(TestError testError) {
1003 if (testError.getTester() instanceof TagChecker) {
1004 int code = testError.getCode();
1005 return code == EMPTY_VALUES || code == INVALID_SPACE ||
1006 code == INVALID_KEY_SPACE || code == INVALID_HTML ||
1007 code == MULTIPLE_SPACES;
1008 }
1009
1010 return false;
1011 }
1012}
Note: See TracBrowser for help on using the repository browser.