source: josm/trunk/src/org/openstreetmap/josm/data/validation/tests/TagChecker.java

Last change on this file was 18984, checked in by GerdP, 2 months ago

see #17035: enable counter that shows "{0} of {1} elements done" for the tested OSM elements also in TagChecker

  • Property svn:eol-style set to native
File size: 66.7 KB
Line 
1// License: GPL. For details, see LICENSE file.
2package org.openstreetmap.josm.data.validation.tests;
3
4import static org.openstreetmap.josm.tools.I18n.marktr;
5import static org.openstreetmap.josm.tools.I18n.tr;
6import static org.openstreetmap.josm.tools.I18n.trn;
7
8import java.awt.GridBagConstraints;
9import java.awt.event.ActionListener;
10import java.io.BufferedReader;
11import java.io.IOException;
12import java.lang.Character.UnicodeBlock;
13import java.util.ArrayList;
14import java.util.Arrays;
15import java.util.Collection;
16import java.util.Collections;
17import java.util.EnumSet;
18import java.util.HashMap;
19import java.util.HashSet;
20import java.util.Iterator;
21import java.util.LinkedHashMap;
22import java.util.LinkedHashSet;
23import java.util.List;
24import java.util.Locale;
25import java.util.Map;
26import java.util.Map.Entry;
27import java.util.Objects;
28import java.util.OptionalInt;
29import java.util.Set;
30import java.util.regex.Pattern;
31import java.util.stream.Collectors;
32
33import javax.swing.JCheckBox;
34import javax.swing.JLabel;
35import javax.swing.JPanel;
36
37import org.openstreetmap.josm.command.ChangePropertyCommand;
38import org.openstreetmap.josm.command.ChangePropertyKeyCommand;
39import org.openstreetmap.josm.command.Command;
40import org.openstreetmap.josm.command.SequenceCommand;
41import org.openstreetmap.josm.data.coor.LatLon;
42import org.openstreetmap.josm.data.osm.AbstractPrimitive;
43import org.openstreetmap.josm.data.osm.DataSet;
44import org.openstreetmap.josm.data.osm.Node;
45import org.openstreetmap.josm.data.osm.OsmPrimitive;
46import org.openstreetmap.josm.data.osm.OsmUtils;
47import org.openstreetmap.josm.data.osm.Relation;
48import org.openstreetmap.josm.data.osm.RelationMember;
49import org.openstreetmap.josm.data.osm.Tag;
50import org.openstreetmap.josm.data.osm.TagMap;
51import org.openstreetmap.josm.data.osm.Tagged;
52import org.openstreetmap.josm.data.osm.Way;
53import org.openstreetmap.josm.data.osm.visitor.MergeSourceBuildingVisitor;
54import org.openstreetmap.josm.data.preferences.sources.ValidatorPrefHelper;
55import org.openstreetmap.josm.data.validation.OsmValidator;
56import org.openstreetmap.josm.data.validation.Severity;
57import org.openstreetmap.josm.data.validation.Test.TagTest;
58import org.openstreetmap.josm.data.validation.TestError;
59import org.openstreetmap.josm.data.validation.util.Entities;
60import org.openstreetmap.josm.gui.progress.ProgressMonitor;
61import org.openstreetmap.josm.gui.tagging.presets.TaggingPreset;
62import org.openstreetmap.josm.gui.tagging.presets.TaggingPresetItem;
63import org.openstreetmap.josm.gui.tagging.presets.TaggingPresetListener;
64import org.openstreetmap.josm.gui.tagging.presets.TaggingPresetType;
65import org.openstreetmap.josm.gui.tagging.presets.TaggingPresets;
66import org.openstreetmap.josm.gui.tagging.presets.items.Check;
67import org.openstreetmap.josm.gui.tagging.presets.items.CheckGroup;
68import org.openstreetmap.josm.gui.tagging.presets.items.ComboMultiSelect;
69import org.openstreetmap.josm.gui.tagging.presets.items.Key;
70import org.openstreetmap.josm.gui.tagging.presets.items.KeyedItem;
71import org.openstreetmap.josm.gui.tagging.presets.items.PresetListEntry;
72import org.openstreetmap.josm.gui.tagging.presets.items.RegionSpecific;
73import org.openstreetmap.josm.gui.widgets.EditableList;
74import org.openstreetmap.josm.io.CachedFile;
75import org.openstreetmap.josm.spi.preferences.Config;
76import org.openstreetmap.josm.tools.GBC;
77import org.openstreetmap.josm.tools.JosmRuntimeException;
78import org.openstreetmap.josm.tools.Logging;
79import org.openstreetmap.josm.tools.MultiMap;
80import org.openstreetmap.josm.tools.Territories;
81import org.openstreetmap.josm.tools.Utils;
82
83/**
84 * Check for misspelled or wrong tags
85 *
86 * @author frsantos
87 * @since 3669
88 */
89public class TagChecker extends TagTest implements TaggingPresetListener {
90
91 /** The config file of ignored tags */
92 public static final String IGNORE_FILE = "resource://data/validator/ignoretags.cfg";
93 /** The config file of dictionary words */
94 public static final String SPELL_FILE = "resource://data/validator/words.cfg";
95
96 /** Normalized keys: the key should be substituted by the value if the key was not found in presets */
97 private static final Map<String, String> harmonizedKeys = new HashMap<>();
98 /** The spell check preset values which are not stored in TaggingPresets */
99 private static volatile HashSet<String> additionalPresetsValueData;
100 /** often used tags which are not in presets */
101 private static final MultiMap<String, String> oftenUsedTags = new MultiMap<>();
102 private static final Map<TaggingPreset, List<TaggingPresetItem>> presetIndex = new LinkedHashMap<>();
103
104 private static final Pattern UNWANTED_NON_PRINTING_CONTROL_CHARACTERS = Pattern.compile(
105 "[\\x00-\\x09\\x0B\\x0C\\x0E-\\x1F\\x7F\\u200e-\\u200f\\u202a-\\u202e]");
106
107 /** The TagChecker data */
108 private static final List<String> ignoreDataStartsWith = new ArrayList<>();
109 private static final Set<String> ignoreDataEquals = new HashSet<>();
110 private static final List<String> ignoreDataEndsWith = new ArrayList<>();
111 private static final List<Tag> ignoreDataTag = new ArrayList<>();
112 /** tag keys that have only numerical values in the presets */
113 private static final Set<String> ignoreForLevenshtein = new HashSet<>();
114
115 /** tag keys that are allowed to be the same on a multipolygon and an outer way */
116 private static final Set<String> ignoreForOuterMPSameTagCheck = new HashSet<>();
117
118 /** The preferences prefix */
119 protected static final String PREFIX = ValidatorPrefHelper.PREFIX + "." + TagChecker.class.getSimpleName();
120
121 MapCSSTagChecker deprecatedChecker;
122
123 /**
124 * The preference key to check values
125 */
126 public static final String PREF_CHECK_VALUES = PREFIX + ".checkValues";
127 /**
128 * The preference key to check keys
129 */
130 public static final String PREF_CHECK_KEYS = PREFIX + ".checkKeys";
131 /**
132 * The preference key to enable complex checks
133 */
134 public static final String PREF_CHECK_COMPLEX = PREFIX + ".checkComplex";
135 /**
136 * The preference key to search for fixme tags
137 */
138 public static final String PREF_CHECK_FIXMES = PREFIX + ".checkFixmes";
139 /**
140 * The preference key to check presets
141 */
142 public static final String PREF_CHECK_PRESETS_TYPES = PREFIX + ".checkPresetsTypes";
143 public static final String PREF_CHECK_REGIONS = PREFIX + ".checkPresetsRegions";
144
145 /**
146 * The preference key for source files
147 * @see #DEFAULT_SOURCES
148 */
149 public static final String PREF_SOURCES = PREFIX + ".source";
150
151 private static final String BEFORE_UPLOAD = "BeforeUpload";
152 /**
153 * The preference key to check keys - used before upload
154 */
155 public static final String PREF_CHECK_KEYS_BEFORE_UPLOAD = PREF_CHECK_KEYS + BEFORE_UPLOAD;
156 /**
157 * The preference key to check values - used before upload
158 */
159 public static final String PREF_CHECK_VALUES_BEFORE_UPLOAD = PREF_CHECK_VALUES + BEFORE_UPLOAD;
160 /**
161 * The preference key to run complex tests - used before upload
162 */
163 public static final String PREF_CHECK_COMPLEX_BEFORE_UPLOAD = PREF_CHECK_COMPLEX + BEFORE_UPLOAD;
164 /**
165 * The preference key to search for fixmes - used before upload
166 */
167 public static final String PREF_CHECK_FIXMES_BEFORE_UPLOAD = PREF_CHECK_FIXMES + BEFORE_UPLOAD;
168 /**
169 * The preference key to search for presets - used before upload
170 */
171 public static final String PREF_CHECK_PRESETS_TYPES_BEFORE_UPLOAD = PREF_CHECK_PRESETS_TYPES + BEFORE_UPLOAD;
172 public static final String PREF_CHECK_REGIONS_BEFORE_UPLOAD = PREF_CHECK_REGIONS + BEFORE_UPLOAD;
173
174 /**
175 * The preference key for the list of tag keys that are allowed to be the same on a multipolygon and an outer way
176 */
177 public static final String PREF_KEYS_IGNORE_OUTER_MP_SAME_TAG = PREFIX + ".ignore-keys-outer-mp-same-tag";
178
179 private static final int MAX_LEVENSHTEIN_DISTANCE = 2;
180
181 protected boolean includeOtherSeverity;
182
183 protected boolean checkKeys;
184 protected boolean checkValues;
185 /** Was used for special configuration file, might be used to disable value spell checker. */
186 protected boolean checkComplex;
187 protected boolean checkFixmes;
188 protected boolean checkPresetsTypes;
189 protected boolean checkRegions;
190
191 protected JCheckBox prefCheckKeys;
192 protected JCheckBox prefCheckValues;
193 protected JCheckBox prefCheckComplex;
194 protected JCheckBox prefCheckFixmes;
195 protected JCheckBox prefCheckPresetsTypes;
196 protected JCheckBox prefCheckRegions;
197
198 protected JCheckBox prefCheckKeysBeforeUpload;
199 protected JCheckBox prefCheckValuesBeforeUpload;
200 protected JCheckBox prefCheckComplexBeforeUpload;
201 protected JCheckBox prefCheckFixmesBeforeUpload;
202 protected JCheckBox prefCheckPresetsTypesBeforeUpload;
203 protected JCheckBox prefCheckRegionsBeforeUpload;
204
205 // CHECKSTYLE.OFF: SingleSpaceSeparator
206 protected static final int EMPTY_VALUES = 1200;
207 protected static final int INVALID_KEY = 1201;
208 protected static final int INVALID_VALUE = 1202;
209 protected static final int FIXME = 1203;
210 protected static final int INVALID_SPACE = 1204;
211 protected static final int INVALID_KEY_SPACE = 1205;
212 protected static final int INVALID_HTML = 1206; /* 1207 was PAINT */
213 protected static final int LONG_VALUE = 1208;
214 protected static final int LONG_KEY = 1209;
215 protected static final int LOW_CHAR_VALUE = 1210;
216 protected static final int LOW_CHAR_KEY = 1211;
217 protected static final int MISSPELLED_VALUE = 1212;
218 protected static final int MISSPELLED_KEY = 1213;
219 protected static final int MULTIPLE_SPACES = 1214;
220 protected static final int MISSPELLED_VALUE_NO_FIX = 1215;
221 protected static final int UNUSUAL_UNICODE_CHAR_VALUE = 1216;
222 protected static final int INVALID_PRESETS_TYPE = 1217;
223 protected static final int MULTIPOLYGON_NO_AREA = 1218;
224 protected static final int MULTIPOLYGON_INCOMPLETE = 1219;
225 protected static final int MULTIPOLYGON_MAYBE_NO_AREA = 1220;
226 protected static final int MULTIPOLYGON_SAME_TAG_ON_OUTER = 1221;
227 protected static final int INVALID_REGION = 1222;
228 // CHECKSTYLE.ON: SingleSpaceSeparator
229
230 protected EditableList sourcesList;
231
232 private static final List<String> DEFAULT_SOURCES = Arrays.asList(IGNORE_FILE, SPELL_FILE);
233
234 /**
235 * Constructor
236 */
237 public TagChecker() {
238 super(tr("Tag checker"), tr("This test checks for errors in tag keys and values."));
239 }
240
241 @Override
242 public void initialize() throws IOException {
243 TaggingPresets.addListener(this);
244 initializeData();
245 initializePresets();
246 analysePresets();
247 }
248
249 /**
250 * Add presets that contain only numerical values to the ignore list
251 */
252 private static void analysePresets() {
253 for (String key : TaggingPresets.getPresetKeys()) {
254 if (isKeyIgnored(key))
255 continue;
256 Set<String> values = TaggingPresets.getPresetValues(key);
257 boolean allNumerical = !Utils.isEmpty(values)
258 && values.stream().allMatch(TagChecker::isNum);
259 if (allNumerical) {
260 ignoreForLevenshtein.add(key);
261 }
262 }
263 }
264
265 /**
266 * Reads the spell-check file into a HashMap.
267 * The data file is a list of words, beginning with +/-. If it starts with +,
268 * the word is valid, but if it starts with -, the word should be replaced
269 * by the nearest + word before this.
270 *
271 * @throws IOException if any I/O error occurs
272 */
273 private static void initializeData() throws IOException {
274 ignoreDataStartsWith.clear();
275 ignoreDataEquals.clear();
276 ignoreDataEndsWith.clear();
277 ignoreDataTag.clear();
278 harmonizedKeys.clear();
279 ignoreForLevenshtein.clear();
280 oftenUsedTags.clear();
281 presetIndex.clear();
282 ignoreForOuterMPSameTagCheck.clear();
283
284 StringBuilder errorSources = new StringBuilder();
285 for (String source : Config.getPref().getList(PREF_SOURCES, DEFAULT_SOURCES)) {
286 try (
287 CachedFile cf = new CachedFile(source);
288 BufferedReader reader = cf.getContentReader()
289 ) {
290 String okValue = null;
291 boolean tagcheckerfile = false;
292 boolean ignorefile = false;
293 boolean isFirstLine = true;
294 String line;
295 while ((line = reader.readLine()) != null) {
296 if (line.isEmpty()) {
297 // ignore
298 } else if (line.startsWith("#")) {
299 if (line.startsWith("# JOSM TagChecker")) {
300 tagcheckerfile = true;
301 Logging.error(tr("Ignoring {0}. Support was dropped", source));
302 } else
303 if (line.startsWith("# JOSM IgnoreTags")) {
304 ignorefile = true;
305 if (!DEFAULT_SOURCES.contains(source)) {
306 Logging.info(tr("Adding {0} to ignore tags", source));
307 }
308 }
309 } else if (ignorefile) {
310 parseIgnoreFileLine(source, line);
311 } else if (tagcheckerfile) {
312 // ignore
313 } else if (line.charAt(0) == '+') {
314 okValue = line.substring(1);
315 } else if (line.charAt(0) == '-' && okValue != null) {
316 String hk = harmonizeKey(line.substring(1));
317 if (!okValue.equals(hk) && harmonizedKeys.put(hk, okValue) != null && Logging.isDebugEnabled()) {
318 Logging.debug("Line was ignored: " + line);
319 }
320 } else {
321 Logging.error(tr("Invalid spellcheck line: {0}", line));
322 }
323 if (isFirstLine) {
324 isFirstLine = false;
325 if (!(tagcheckerfile || ignorefile) && !DEFAULT_SOURCES.contains(source)) {
326 Logging.info(tr("Adding {0} to spellchecker", source));
327 }
328 }
329 }
330 } catch (IOException e) {
331 Logging.error(e);
332 errorSources.append(source).append('\n');
333 }
334 }
335
336 if (errorSources.length() > 0)
337 throw new IOException(trn(
338 "Could not access data file:\n{0}",
339 "Could not access data files:\n{0}", errorSources.length(), errorSources));
340 }
341
342 /**
343 * Parse a line found in a configuration file
344 * @param source name of configuration file
345 * @param line the line to parse
346 */
347 private static void parseIgnoreFileLine(String source, String line) {
348 line = line.trim();
349 if (line.length() < 4) {
350 return;
351 }
352 try {
353 String key = line.substring(0, 2);
354 line = line.substring(2);
355
356 switch (key) {
357 case "S:":
358 ignoreDataStartsWith.add(line);
359 break;
360 case "E:":
361 ignoreDataEquals.add(line);
362 addToKeyDictionary(line);
363 break;
364 case "F:":
365 ignoreDataEndsWith.add(line);
366 break;
367 case "K:":
368 Tag tag = Tag.ofString(line);
369 ignoreDataTag.add(tag);
370 oftenUsedTags.put(tag.getKey(), tag.getValue());
371 addToKeyDictionary(tag.getKey());
372 break;
373 default:
374 if (!key.startsWith(";")) {
375 Logging.warn("Unsupported TagChecker key: " + key);
376 }
377 }
378 } catch (IllegalArgumentException e) {
379 Logging.error("Invalid line in {0} : {1}", source, e.getMessage());
380 Logging.trace(e);
381 }
382 }
383
384 private static void addToKeyDictionary(String key) {
385 if (key != null) {
386 String hk = harmonizeKey(key);
387 if (!key.equals(hk)) {
388 harmonizedKeys.put(hk, key);
389 }
390 }
391 }
392
393 /**
394 * Reads the presets data.
395 *
396 */
397 public static void initializePresets() {
398
399 if (!Config.getPref().getBoolean(PREF_CHECK_VALUES, true))
400 return;
401
402 Collection<TaggingPreset> presets = TaggingPresets.getTaggingPresets();
403 if (!presets.isEmpty()) {
404 initAdditionalPresetsValueData();
405 for (TaggingPreset p : presets) {
406 List<TaggingPresetItem> minData = new ArrayList<>();
407 for (TaggingPresetItem i : p.data) {
408 if (i instanceof KeyedItem) {
409 if (!"none".equals(((KeyedItem) i).match))
410 minData.add(i);
411 addPresetValue((KeyedItem) i);
412 } else if (i instanceof CheckGroup) {
413 for (Check c : ((CheckGroup) i).checks) {
414 addPresetValue(c);
415 }
416 }
417 }
418 if (!minData.isEmpty()) {
419 presetIndex .put(p, minData);
420 }
421 }
422 }
423 }
424
425 private static void initAdditionalPresetsValueData() {
426 additionalPresetsValueData = new HashSet<>();
427 additionalPresetsValueData.addAll(AbstractPrimitive.getUninterestingKeys());
428 additionalPresetsValueData.addAll(Config.getPref().getList(
429 ValidatorPrefHelper.PREFIX + ".knownkeys",
430 Arrays.asList("is_in", "int_ref", "fixme", "population")));
431 }
432
433 private static void addPresetValue(KeyedItem ky) {
434 if (ky.key != null && ky.getValues() != null) {
435 addToKeyDictionary(ky.key);
436 }
437 }
438
439 /**
440 * Checks given string (key or value) if it contains unwanted non-printing control characters (either ASCII or Unicode bidi characters)
441 * @param s string to check
442 * @return {@code true} if {@code s} contains non-printing control characters
443 */
444 static boolean containsUnwantedNonPrintingControlCharacter(String s) {
445 return !Utils.isEmpty(s) && (
446 isJoiningChar(s.charAt(0)) ||
447 isJoiningChar(s.charAt(s.length() - 1)) ||
448 s.chars().anyMatch(c -> (isAsciiControlChar(c) && !isNewLineChar(c)) || isBidiControlChar(c))
449 );
450 }
451
452 private static boolean isAsciiControlChar(int c) {
453 return c < 0x20 || c == 0x7F;
454 }
455
456 private static boolean isNewLineChar(int c) {
457 return c == 0x0a || c == 0x0d;
458 }
459
460 private static boolean isJoiningChar(int c) {
461 return c == 0x200c || c == 0x200d; // ZWNJ, ZWJ
462 }
463
464 private static boolean isBidiControlChar(int c) {
465 /* check for range 0x200e to 0x200f (LRM, RLM) or
466 0x202a to 0x202e (LRE, RLE, PDF, LRO, RLO) */
467 return (c >= 0x200e && c <= 0x200f) || (c >= 0x202a && c <= 0x202e);
468 }
469
470 static String removeUnwantedNonPrintingControlCharacters(String s) {
471 // Remove all unwanted characters
472 String result = UNWANTED_NON_PRINTING_CONTROL_CHARACTERS.matcher(s).replaceAll("");
473 // Remove joining characters located at the beginning of the string
474 while (!result.isEmpty() && isJoiningChar(result.charAt(0))) {
475 result = result.substring(1);
476 }
477 // Remove joining characters located at the end of the string
478 while (!result.isEmpty() && isJoiningChar(result.charAt(result.length() - 1))) {
479 result = result.substring(0, result.length() - 1);
480 }
481 return result;
482 }
483
484 static boolean containsUnusualUnicodeCharacter(String key, String value) {
485 return getUnusualUnicodeCharacter(key, value).isPresent();
486 }
487
488 static OptionalInt getUnusualUnicodeCharacter(String key, String value) {
489 return value == null
490 ? OptionalInt.empty()
491 : value.chars().filter(c -> isUnusualUnicodeBlock(key, c)).findFirst();
492 }
493
494 /**
495 * Detects highly suspicious Unicode characters that have been seen in OSM database.
496 * @param key tag key
497 * @param c current character code point
498 * @return {@code true} if the current unicode block is very unusual for the given key
499 */
500 private static boolean isUnusualUnicodeBlock(String key, int c) {
501 UnicodeBlock b = UnicodeBlock.of(c);
502 return isUnusualPhoneticUse(key, b, c) || isUnusualBmpUse(b) || isUnusualSmpUse(b);
503 }
504
505 private static boolean isAllowedPhoneticCharacter(String key, int c) {
506 // CHECKSTYLE.OFF: BooleanExpressionComplexity
507 return c == 0x0259 || c == 0x018F // U+0259 is paired with the capital letter U+018F in Azeri, see #18740
508 || c == 0x0254 || c == 0x0186 // U+0254 is paired with the capital letter U+0186 in several African languages, see #18740
509 || c == 0x0257 || c == 0x018A // "ɗ/Ɗ" (U+0257/U+018A), see #19760
510 || c == 0x025B || c == 0x0190 // U+025B is paired with the capital letter U+0190 in several African languages, see #18740
511 || c == 0x0263 || c == 0x0194 // "ɣ/Ɣ" (U+0263/U+0194), see #18740
512 || c == 0x0268 || c == 0x0197 // "ɨ/Ɨ" (U+0268/U+0197), see #18740
513 || c == 0x0269 || c == 0x0196 // "ɩ/Ɩ" (U+0269/U+0196), see #20437
514 || c == 0x0272 || c == 0x019D // "ɲ/Ɲ" (U+0272/U+019D), see #18740
515 || c == 0x0273 || c == 0x019E // "ŋ/Ŋ" (U+0273/U+019E), see #18740
516 || c == 0x0142 || c == 0x0294 // see #20754
517 || c == 0x1DBB || c == 0x02B7 // "ᶻ/ʷ" (U+1DBB/U+02B7), see #23138. Both characters are used in Lushootseed paired with a letter
518 || (key.endsWith("ref") && 0x1D2C <= c && c <= 0x1D42); // allow uppercase superscript latin characters in *ref tags
519 }
520
521 private static boolean isUnusualPhoneticUse(String key, UnicodeBlock b, int c) {
522 return !isAllowedPhoneticCharacter(key, c)
523 && (b == UnicodeBlock.IPA_EXTENSIONS // U+0250..U+02AF
524 || b == UnicodeBlock.PHONETIC_EXTENSIONS // U+1D00..U+1D7F
525 || b == UnicodeBlock.PHONETIC_EXTENSIONS_SUPPLEMENT) // U+1D80..U+1DBF
526 && !key.endsWith(":pronunciation");
527 }
528
529 private static boolean isUnusualBmpUse(UnicodeBlock b) {
530 return b == UnicodeBlock.COMBINING_MARKS_FOR_SYMBOLS // U+20D0..U+20FF
531 || b == UnicodeBlock.MATHEMATICAL_OPERATORS // U+2200..U+22FF
532 || b == UnicodeBlock.ENCLOSED_ALPHANUMERICS // U+2460..U+24FF
533 || b == UnicodeBlock.BOX_DRAWING // U+2500..U+257F
534 || b == UnicodeBlock.GEOMETRIC_SHAPES // U+25A0..U+25FF
535 || b == UnicodeBlock.DINGBATS // U+2700..U+27BF
536 || b == UnicodeBlock.MISCELLANEOUS_SYMBOLS_AND_ARROWS // U+2B00..U+2BFF
537 || b == UnicodeBlock.GLAGOLITIC // U+2C00..U+2C5F
538 || b == UnicodeBlock.HANGUL_COMPATIBILITY_JAMO // U+3130..U+318F
539 || b == UnicodeBlock.ENCLOSED_CJK_LETTERS_AND_MONTHS // U+3200..U+32FF
540 || b == UnicodeBlock.LATIN_EXTENDED_D // U+A720..U+A7FF
541 || b == UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS // U+F900..U+FAFF
542 || b == UnicodeBlock.ALPHABETIC_PRESENTATION_FORMS // U+FB00..U+FB4F
543 || b == UnicodeBlock.VARIATION_SELECTORS // U+FE00..U+FE0F
544 || b == UnicodeBlock.SPECIALS; // U+FFF0..U+FFFF
545 // CHECKSTYLE.ON: BooleanExpressionComplexity
546 }
547
548 private static boolean isUnusualSmpUse(UnicodeBlock b) {
549 // UnicodeBlock.SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS is only defined in Java 9+
550 return b == UnicodeBlock.MUSICAL_SYMBOLS // U+1D100..U+1D1FF
551 || b == UnicodeBlock.ENCLOSED_ALPHANUMERIC_SUPPLEMENT // U+1F100..U+1F1FF
552 || b == UnicodeBlock.EMOTICONS // U+1F600..U+1F64F
553 || b == UnicodeBlock.TRANSPORT_AND_MAP_SYMBOLS; // U+1F680..U+1F6FF
554 }
555
556 /**
557 * Get set of preset values for the given key.
558 * @param key the key
559 * @return null if key is not in presets or in additionalPresetsValueData,
560 * else a set which might be empty.
561 */
562 private static Set<String> getPresetValues(String key) {
563 if (TaggingPresets.isKeyInPresets(key)) {
564 return TaggingPresets.getPresetValues(key);
565 }
566 if (additionalPresetsValueData.contains(key))
567 return Collections.emptySet();
568 // null means key is not known
569 return null;
570 }
571
572 /**
573 * Determines if the given key is in internal presets.
574 * @param key key
575 * @return {@code true} if the given key is in internal presets
576 * @since 9023
577 * @deprecated since 18281 -- use {@link TaggingPresets#isKeyInPresets(String)} instead
578 */
579 @Deprecated
580 public static boolean isKeyInPresets(String key) {
581 return TaggingPresets.isKeyInPresets(key);
582 }
583
584 /**
585 * Determines if the given tag is in internal presets.
586 * @param key key
587 * @param value value
588 * @return {@code true} if the given tag is in internal presets
589 * @since 9023
590 */
591 public static boolean isTagInPresets(String key, String value) {
592 final Set<String> values = getPresetValues(key);
593 return values != null && values.contains(value);
594 }
595
596 /**
597 * Returns the list of ignored tags.
598 * @return the list of ignored tags
599 * @since 9023
600 */
601 public static List<Tag> getIgnoredTags() {
602 return new ArrayList<>(ignoreDataTag);
603 }
604
605 /**
606 * Determines if the given tag key is ignored for checks "key/tag not in presets".
607 * @param key key
608 * @return true if the given key is ignored
609 */
610 private static boolean isKeyIgnored(String key) {
611 return ignoreDataEquals.contains(key)
612 || ignoreDataStartsWith.stream().anyMatch(key::startsWith)
613 || ignoreDataEndsWith.stream().anyMatch(key::endsWith);
614 }
615
616 /**
617 * Determines if the given tag is ignored for checks "key/tag not in presets".
618 * @param key key
619 * @param value value
620 * @return {@code true} if the given tag is ignored
621 * @since 9023
622 */
623 public static boolean isTagIgnored(String key, String value) {
624 if (isKeyIgnored(key))
625 return true;
626 final Set<String> values = getPresetValues(key);
627 if (values != null && values.isEmpty())
628 return true;
629 if (!isTagInPresets(key, value)) {
630 return ignoreDataTag.stream()
631 .anyMatch(a -> key.equals(a.getKey()) && value.equals(a.getValue()));
632 }
633 return false;
634 }
635
636 /**
637 * Checks the primitive tags
638 * @param p The primitive to check
639 */
640 @Override
641 public void check(OsmPrimitive p) {
642 if (!p.isTagged())
643 return;
644
645 // Just a collection to know if a primitive has been already marked with error
646 MultiMap<OsmPrimitive, String> withErrors = new MultiMap<>();
647
648 for (Entry<String, String> prop : p.getKeys().entrySet()) {
649 String s = marktr("Tag ''{0}'' invalid.");
650 String key = prop.getKey();
651 String value = prop.getValue();
652
653 if (checkKeys) {
654 checkSingleTagKeySimple(withErrors, p, s, key);
655 }
656 if (checkValues) {
657 checkSingleTagValueSimple(withErrors, p, s, key, value);
658 checkSingleTagComplex(withErrors, p, key, value);
659 }
660 if (checkFixmes && key != null && !Utils.isEmpty(value) && isFixme(key, value) && !withErrors.contains(p, "FIXME")) {
661 errors.add(TestError.builder(this, Severity.OTHER, FIXME)
662 .message(tr("fixme"))
663 .primitives(p)
664 .build());
665 withErrors.put(p, "FIXME");
666 }
667 }
668
669 if (p instanceof Relation && p.hasTag("type", "multipolygon")) {
670 checkMultipolygonTags(p);
671 }
672
673 final Collection<TaggingPreset> matchingPresets;
674 TagMap tags;
675 if (checkPresetsTypes || checkRegions) {
676 tags = p.getKeys();
677 matchingPresets = presetIndex.entrySet().stream()
678 .filter(e -> TaggingPresetItem.matches(e.getValue(), tags))
679 .map(Entry::getKey)
680 .collect(Collectors.toCollection(LinkedHashSet::new));
681 } else {
682 matchingPresets = null;
683 tags = null;
684 }
685
686 if (checkPresetsTypes) {
687 checkPresetsTypes(p, matchingPresets, tags);
688 }
689
690 if (checkRegions) {
691 checkRegions(p, matchingPresets);
692 }
693 }
694
695 /**
696 * Check that the primitive matches the preset types for the preset
697 * @param p The primitive to check
698 * @param matchingPresets The presets to go through
699 * @param tags Tags from the primitive to check
700 */
701 private void checkPresetsTypes(OsmPrimitive p, Collection<TaggingPreset> matchingPresets, Map<String, String> tags) {
702 TaggingPresetType presetType = TaggingPresetType.forPrimitive(p);
703 EnumSet<TaggingPresetType> presetTypes = EnumSet.of(presetType);
704
705 Collection<TaggingPreset> matchingPresetsOK = matchingPresets.stream().filter(
706 tp -> tp.typeMatches(presetTypes)).collect(Collectors.toList());
707 Collection<TaggingPreset> matchingPresetsKO = matchingPresets.stream().filter(
708 tp -> !tp.typeMatches(presetTypes)).collect(Collectors.toList());
709
710 for (TaggingPreset tp : matchingPresetsKO) {
711 // Potential error, unless matching tags are all known by a supported preset
712 Map<String, String> matchingTags = tp.data.stream()
713 .filter(i -> Boolean.TRUE.equals(i.matches(tags)))
714 .filter(i -> i instanceof KeyedItem).map(i -> ((KeyedItem) i).key)
715 .collect(Collectors.toMap(k -> k, tags::get));
716 if (matchingPresetsOK.stream().noneMatch(
717 tp2 -> matchingTags.entrySet().stream().allMatch(
718 e -> tp2.data.stream().anyMatch(
719 i -> i instanceof KeyedItem && ((KeyedItem) i).key.equals(e.getKey()))))) {
720 errors.add(TestError.builder(this, Severity.OTHER, INVALID_PRESETS_TYPE)
721 .message(tr("Object type not in preset"),
722 marktr("Object type {0} is not supported by tagging preset: {1}"),
723 tr(presetType.getName()), tp.getLocaleName())
724 .primitives(p)
725 .build());
726 }
727 }
728 }
729
730 /**
731 * Check that the preset is valid for the region the primitive is in
732 * @param p The primitive to check
733 * @param matchingPresets The presets to check against
734 */
735 private void checkRegions(OsmPrimitive p, Collection<TaggingPreset> matchingPresets) {
736 LatLon center;
737 if (p instanceof Node) {
738 center = ((Node) p).getCoor();
739 } else {
740 center = p.getBBox().getCenter();
741 }
742 for (TaggingPreset preset : matchingPresets) {
743 if (preset.regions() != null) {
744 boolean isInRegion = false; //true if the object is in an applicable region
745 for (String region : preset.regions()) {
746 if (Territories.isIso3166Code(region, center)) { //check if center of the object is in a region
747 isInRegion = true;
748 }
749 }
750 if (isInRegion == preset.exclude_regions()) {
751 errors.add(TestError.builder(this, Severity.WARNING, INVALID_REGION)
752 .message(tr("Preset is invalid in this region"),
753 marktr("Preset {0} should not be applied in this region"),
754 preset.getLocaleName())
755 .primitives(p)
756 .build());
757 }
758 }
759 // Check the tags
760 tagCheck(preset, p, center, preset.data);
761 }
762 }
763
764 /**
765 * Perform the checks against a given preset value
766 * @param preset The originating preset (used for error creation)
767 * @param p The originating primitive (used for error creation)
768 * @param center The center of the primitive or other location of the primitive to check
769 * @param tagInformation The sub items for the preset
770 */
771 private void tagCheck(TaggingPreset preset, OsmPrimitive p, LatLon center, List<? extends TaggingPresetItem> tagInformation) {
772 for (TaggingPresetItem item : tagInformation) {
773 if (item instanceof CheckGroup) {
774 tagCheckReal(preset, p, center, ((CheckGroup) item).checks);
775 } else if (item instanceof ComboMultiSelect) {
776 tagCheckReal(preset, p, center, ((ComboMultiSelect) item).presetListEntries());
777 }
778 if (item instanceof RegionSpecific && ((RegionSpecific) item).regions() != null) {
779 tagCheckReal(preset, p, center, (RegionSpecific) item);
780 }
781 }
782 }
783
784 /**
785 * Perform the checks against a given preset value
786 * @param preset The originating preset (used for error creation)
787 * @param p The originating primitive (used for error creation)
788 * @param center The center of the primitive or other location of the primitive to check
789 * @param data The data for the region specific information
790 */
791 private void tagCheckReal(TaggingPreset preset, OsmPrimitive p, LatLon center, List<? extends RegionSpecific> data) {
792 for (RegionSpecific regionSpecific : data) {
793 if (regionSpecific.regions() != null) {
794 tagCheckReal(preset, p, center, regionSpecific);
795 }
796 }
797 }
798
799 /**
800 * Perform the checks against a given preset value
801 * @param preset The originating preset (used for error creation)
802 * @param p The originating primitive (used for error creation)
803 * @param center The center of the primitive or other location of the primitive to check
804 * @param data The data for the region specific information
805 */
806 private void tagCheckReal(TaggingPreset preset, OsmPrimitive p, LatLon center, RegionSpecific data) {
807 // First, check if we aren't in the region for the tag
808 if (latLonInRegions(center, data.regions()) == data.exclude_regions()) {
809 final String key;
810 final String value;
811 if (data instanceof PresetListEntry) {
812 key = ((PresetListEntry) data).cms.key;
813 value = ((PresetListEntry) data).value;
814 } else if (data instanceof KeyedItem) {
815 key = ((KeyedItem) data).key;
816 if (data instanceof Key) {
817 value = ((Key) data).value;
818 } else {
819 value = null;
820 }
821 } else {
822 throw new JosmRuntimeException("Unknown implementor for RegionSpecific");
823 }
824 if (p.hasTag(key) && (value == null || value.equals(p.get(key)))) {
825 final TestError.Builder builder = TestError.builder(this, Severity.WARNING, INVALID_REGION)
826 .primitives(p);
827 if (value == null) {
828 builder.message(tr("Key from a preset is invalid in this region"),
829 marktr("Preset {0} should not have the key {1}"),
830 preset.getLocaleName(), key);
831 } else {
832 builder.message(tr("Value from a preset is invalid in this region"),
833 marktr("Preset {0} should not have the tag {1}={2}"),
834 preset.getLocaleName(), key, value);
835 }
836 errors.add(builder.build());
837 }
838 }
839 }
840
841 /**
842 * Check if the specified latlon is inside any of the specified regions
843 * @param latLon The {@link LatLon} to check
844 * @param regions The regions to see if the {@link LatLon} is in
845 * @return {@code true} if the coordinate is inside any of the regions
846 */
847 private static boolean latLonInRegions(LatLon latLon, Collection<String> regions) {
848 if (regions != null) {
849 for (String region : regions) {
850 if (Territories.isIso3166Code(region, latLon)) {
851 return true;
852 }
853 }
854 }
855 return false;
856 }
857
858 private static final Collection<String> NO_AREA_KEYS = Arrays.asList("name", "area", "ref", "access", "operator");
859
860 private void checkMultipolygonTags(OsmPrimitive p) {
861 if (p.isAnnotated() || p.keys()
862 .anyMatch(k -> k.matches("^(abandoned|construction|demolished|disused|planned|razed|removed|was).*")))
863 return;
864
865 checkOuterWaysOfRelation((Relation) p);
866
867 if (hasAcceptedPrimaryTagForMultipolygon(p))
868 return;
869 TestError.Builder builder = null;
870 if (p.hasKey("surface")) {
871 // accept often used tag surface=* as area tag
872 builder = TestError.builder(this, Severity.OTHER, MULTIPOLYGON_INCOMPLETE)
873 .message(tr("Multipolygon tags"), marktr("only {0} tag"), "surface");
874 } else {
875 Map<String, String> filteredTags = p.getInterestingTags();
876 filteredTags.remove("type");
877 NO_AREA_KEYS.forEach(filteredTags::remove);
878 filteredTags.keySet().removeIf(key -> !key.matches("[a-z0-9:_]+"));
879
880 if (filteredTags.isEmpty()) {
881 builder = TestError.builder(this, Severity.ERROR, MULTIPOLYGON_NO_AREA)
882 .message(tr("Multipolygon tags"), marktr("tag describing the area is missing"), new Object());
883
884 }
885 }
886 if (builder == null) {
887 // multipolygon has either no area tag or a rarely used one
888 builder = TestError.builder(this, Severity.WARNING, MULTIPOLYGON_MAYBE_NO_AREA)
889 .message(tr("Multipolygon tags"), marktr("tag describing the area might be missing"), new Object());
890 }
891 errors.add(builder.primitives(p).build());
892 }
893
894 /**
895 * Check if an outer way of the relation has the same tag as the relation.
896 * @param rel the relation
897 */
898 private void checkOuterWaysOfRelation(Relation rel) {
899 for (Entry<String, String> tag : rel.getInterestingTags().entrySet()) {
900 if (ignoreForOuterMPSameTagCheck.contains(tag.getKey()))
901 continue;
902
903 Set<Way> sameOuters = rel.getMembers().stream()
904 .filter(rm -> rm.isWay() && rm.getWay().isArea() && "outer".equals(rm.getRole())
905 && tag.getValue().equals(rm.getWay().get(tag.getKey())))
906 .map(RelationMember::getWay).collect(Collectors.toSet());
907 if (!sameOuters.isEmpty()) {
908 List<OsmPrimitive> primitives = new ArrayList<>(sameOuters.size() + 1);
909 primitives.add(rel);
910 primitives.addAll(sameOuters);
911 Way w = new Way();
912 w.put(tag.getKey(), tag.getValue());
913 if (hasAcceptedPrimaryTagForMultipolygon(w)) {
914 errors.add(TestError.builder(this, Severity.WARNING, MULTIPOLYGON_SAME_TAG_ON_OUTER)
915 .message(tr("Multipolygon outer way repeats major tag of relation"),
916 marktr("Same tag:''{0}''=''{1}''"), tag.getKey(), tag.getValue())
917 .primitives(primitives)
918 .build());
919 } else {
920 errors.add(TestError.builder(this, Severity.OTHER, MULTIPOLYGON_SAME_TAG_ON_OUTER)
921 .message(tr("Multipolygon outer way repeats tag of relation"),
922 marktr("Same tag:''{0}''=''{1}''"), tag.getKey(), tag.getValue())
923 .primitives(primitives)
924 .build());
925 }
926 }
927 }
928 }
929
930 /**
931 * Check if a multipolygon has a main tag that describes the type of area. Accepts also some deprecated tags and typos.
932 * @param p the multipolygon
933 * @return true if the multipolygon has a main tag that (likely) describes the type of area.
934 */
935 private static boolean hasAcceptedPrimaryTagForMultipolygon(OsmPrimitive p) {
936 if (p.hasKey("landuse", "amenity", "building", "building:part", "area:highway", "shop", "place", "boundary",
937 "landform", "piste:type", "sport", "golf", "landcover", "aeroway", "office", "healthcare", "craft", "room")
938 || p.hasTagDifferent("natural", "tree", "peak", "saddle", "tree_row")
939 || p.hasTagDifferent("man_made", "survey_point", "mast", "flagpole", "manhole", "watertap")
940 || p.hasTagDifferent("highway", "crossing", "bus_stop", "turning_circle", "street_lamp",
941 "traffic_signals", "stop", "milestone", "mini_roundabout", "motorway_junction", "passing_place",
942 "speed_camera", "traffic_mirror", "trailhead", "turning_circle", "turning_loop", "toll_gantry")
943 || p.hasTagDifferent("tourism", "attraction", "artwork")
944 || p.hasTagDifferent("leisure", "picnic_table", "slipway", "firepit")
945 || p.hasTagDifferent("historic", "wayside_cross", "milestone"))
946 return true;
947 if (p.hasTag("barrier", "hedge", "retaining_wall")
948 || p.hasTag("public_transport", "platform", "station")
949 || p.hasTag("railway", "platform")
950 || p.hasTag("waterway", "riverbank", "dam", "rapids", "dock", "boatyard", "fuel")
951 || p.hasTag("indoor", "corridor", "room", "area")
952 || p.hasTag("power", "substation", "generator", "plant", "switchgear", "converter", "sub_station")
953 || p.hasTag("seamark:type", "harbour", "fairway", "anchorage", "landmark", "berth", "harbour_basin",
954 "separation_zone")
955 || (p.get("seamark:type") != null && p.get("seamark:type").matches(".*\\_(area|zone)$")))
956 return true;
957 return p.hasTag("harbour", OsmUtils.TRUE_VALUE)
958 || p.hasTag("flood_prone", OsmUtils.TRUE_VALUE)
959 || p.hasTag("bridge", OsmUtils.TRUE_VALUE)
960 || p.hasTag("ruins", OsmUtils.TRUE_VALUE)
961 || p.hasTag("junction", OsmUtils.TRUE_VALUE);
962 }
963
964 private void checkSingleTagValueSimple(MultiMap<OsmPrimitive, String> withErrors, OsmPrimitive p, String s, String key, String value) {
965 if (!checkValues || value == null)
966 return;
967 if (containsUnwantedNonPrintingControlCharacter(value) && !withErrors.contains(p, "ICV")) {
968 errors.add(TestError.builder(this, Severity.WARNING, LOW_CHAR_VALUE)
969 .message(tr("Tag value contains non-printing (usually invisible) character"), s, key)
970 .primitives(p)
971 .fix(() -> new ChangePropertyCommand(p, key, removeUnwantedNonPrintingControlCharacters(value)))
972 .build());
973 withErrors.put(p, "ICV");
974 }
975 final OptionalInt unusualUnicodeCharacter = getUnusualUnicodeCharacter(key, value);
976 if (unusualUnicodeCharacter.isPresent() && !withErrors.contains(p, "UUCV")) {
977 final String codepoint = String.format(Locale.ROOT, "U+%04X", unusualUnicodeCharacter.getAsInt());
978 errors.add(TestError.builder(this, Severity.WARNING, UNUSUAL_UNICODE_CHAR_VALUE)
979 .message(tr("Tag value contains unusual Unicode character {0}", codepoint), s, key)
980 .primitives(p)
981 .build());
982 withErrors.put(p, "UUCV");
983 }
984 if ((value.length() > Tagged.MAX_TAG_LENGTH) && !withErrors.contains(p, "LV")) {
985 errors.add(TestError.builder(this, Severity.ERROR, LONG_VALUE)
986 .message(tr("Tag value longer than {0} characters ({1} characters)", Tagged.MAX_TAG_LENGTH, value.length()), s, key)
987 .primitives(p)
988 .build());
989 withErrors.put(p, "LV");
990 }
991 if (value.trim().isEmpty() && !withErrors.contains(p, "EV")) {
992 errors.add(TestError.builder(this, Severity.WARNING, EMPTY_VALUES)
993 .message(tr("Tags with empty values"), s, key)
994 .primitives(p)
995 .build());
996 withErrors.put(p, "EV");
997 }
998 final String errTypeSpace = "SPACE";
999 if ((value.startsWith(" ") || value.endsWith(" ")) && !withErrors.contains(p, errTypeSpace)) {
1000 errors.add(TestError.builder(this, Severity.WARNING, INVALID_SPACE)
1001 .message(tr("Property values start or end with white space"), s, key)
1002 .primitives(p)
1003 .build());
1004 withErrors.put(p, errTypeSpace);
1005 }
1006 if (value.contains(" ") && !withErrors.contains(p, errTypeSpace)) {
1007 errors.add(TestError.builder(this, Severity.WARNING, MULTIPLE_SPACES)
1008 .message(tr("Property values contain multiple white spaces"), s, key)
1009 .primitives(p)
1010 .build());
1011 withErrors.put(p, errTypeSpace);
1012 }
1013 if (includeOtherSeverity && !value.equals(Entities.unescape(value)) && !withErrors.contains(p, "HTML")) {
1014 errors.add(TestError.builder(this, Severity.OTHER, INVALID_HTML)
1015 .message(tr("Property values contain HTML entity"), s, key)
1016 .primitives(p)
1017 .build());
1018 withErrors.put(p, "HTML");
1019 }
1020 }
1021
1022 private void checkSingleTagKeySimple(MultiMap<OsmPrimitive, String> withErrors, OsmPrimitive p, String s, String key) {
1023 if (!checkKeys || key == null)
1024 return;
1025 if (containsUnwantedNonPrintingControlCharacter(key) && !withErrors.contains(p, "ICK")) {
1026 errors.add(TestError.builder(this, Severity.WARNING, LOW_CHAR_KEY)
1027 .message(tr("Tag key contains non-printing character"), s, key)
1028 .primitives(p)
1029 .fix(() -> new ChangePropertyCommand(p, key, removeUnwantedNonPrintingControlCharacters(key)))
1030 .build());
1031 withErrors.put(p, "ICK");
1032 }
1033 if (key.length() > Tagged.MAX_TAG_LENGTH && !withErrors.contains(p, "LK")) {
1034 errors.add(TestError.builder(this, Severity.ERROR, LONG_KEY)
1035 .message(tr("Tag key longer than {0} characters ({1} characters)", Tagged.MAX_TAG_LENGTH, key.length()), s, key)
1036 .primitives(p)
1037 .build());
1038 withErrors.put(p, "LK");
1039 }
1040 if (key.indexOf(' ') >= 0 && !withErrors.contains(p, "IPK")) {
1041 errors.add(TestError.builder(this, Severity.WARNING, INVALID_KEY_SPACE)
1042 .message(tr("Invalid white space in property key"), s, key)
1043 .primitives(p)
1044 .build());
1045 withErrors.put(p, "IPK");
1046 }
1047 }
1048
1049 private void checkSingleTagComplex(MultiMap<OsmPrimitive, String> withErrors, OsmPrimitive p, String key, String value) {
1050 if (!checkValues || key == null || Utils.isEmpty(value))
1051 return;
1052 if (additionalPresetsValueData != null && !isTagIgnored(key, value)) {
1053 if (!TaggingPresets.isKeyInPresets(key)) {
1054 spellCheckKey(withErrors, p, key);
1055 } else if (!isTagInPresets(key, value)) {
1056 if (oftenUsedTags.contains(key, value)) {
1057 // tag is quite often used but not in presets
1058 errors.add(TestError.builder(this, Severity.OTHER, INVALID_VALUE)
1059 .message(tr("Presets do not contain property value"),
1060 marktr("Value ''{0}'' for key ''{1}'' not in presets, but is known."), value, key)
1061 .primitives(p)
1062 .build());
1063 withErrors.put(p, "UPV");
1064 } else {
1065 tryGuess(p, key, value, withErrors);
1066 }
1067 }
1068 }
1069 }
1070
1071 private void spellCheckKey(MultiMap<OsmPrimitive, String> withErrors, OsmPrimitive p, String key) {
1072 String prettifiedKey = harmonizeKey(key);
1073 String fixedKey;
1074 if (ignoreDataEquals.contains(prettifiedKey)) {
1075 fixedKey = prettifiedKey;
1076 } else {
1077 fixedKey = TaggingPresets.isKeyInPresets(prettifiedKey) ? prettifiedKey : harmonizedKeys.get(prettifiedKey);
1078 }
1079 if (fixedKey == null && ignoreDataTag.stream().anyMatch(a -> a.getKey().equals(prettifiedKey))) {
1080 fixedKey = prettifiedKey;
1081 }
1082
1083 if (!Utils.isEmpty(fixedKey) && !Objects.equals(fixedKey, key)) {
1084 final String proposedKey = fixedKey;
1085 // misspelled preset key
1086 final TestError.Builder error = TestError.builder(this, Severity.WARNING, MISSPELLED_KEY)
1087 .message(tr("Misspelled property key"), marktr("Key ''{0}'' looks like ''{1}''."), key, proposedKey)
1088 .primitives(p);
1089 if (p.hasKey(fixedKey)) {
1090 errors.add(error.build());
1091 } else {
1092 errors.add(error.fix(() -> new ChangePropertyKeyCommand(p, key, proposedKey)).build());
1093 }
1094 withErrors.put(p, "WPK");
1095 } else if (includeOtherSeverity) {
1096 errors.add(TestError.builder(this, Severity.OTHER, INVALID_KEY)
1097 .message(tr("Presets do not contain property key"), marktr("Key ''{0}'' not in presets."), key)
1098 .primitives(p)
1099 .build());
1100 withErrors.put(p, "UPK");
1101 }
1102 }
1103
1104 private void tryGuess(OsmPrimitive p, String key, String value, MultiMap<OsmPrimitive, String> withErrors) {
1105 // try to fix common typos and check again if value is still unknown
1106 final String harmonizedValue = harmonizeValue(value);
1107 if (Utils.isEmpty(harmonizedValue))
1108 return;
1109 String fixedValue;
1110 List<Set<String>> sets = new ArrayList<>();
1111 Set<String> presetValues = getPresetValues(key);
1112 if (presetValues != null)
1113 sets.add(presetValues);
1114 Set<String> usedValues = oftenUsedTags.get(key);
1115 if (usedValues != null)
1116 sets.add(usedValues);
1117 fixedValue = sets.stream().anyMatch(possibleValues -> possibleValues.contains(harmonizedValue))
1118 ? harmonizedValue : null;
1119 if (fixedValue == null && !ignoreForLevenshtein.contains(key)) {
1120 int maxPresetValueLen = 0;
1121 List<String> fixVals = new ArrayList<>();
1122 // use Levenshtein distance to find typical typos
1123 int minDist = MAX_LEVENSHTEIN_DISTANCE + 1;
1124 for (Set<String> possibleValues: sets) {
1125 for (String possibleVal : possibleValues) {
1126 if (possibleVal.isEmpty())
1127 continue;
1128 maxPresetValueLen = Math.max(maxPresetValueLen, possibleVal.length());
1129 if (harmonizedValue.length() < 3 && possibleVal.length() >= harmonizedValue.length() + MAX_LEVENSHTEIN_DISTANCE) {
1130 // don't suggest fix value when given value is short and lengths are too different
1131 // for example surface=u would result in surface=mud
1132 continue;
1133 }
1134 int dist = Utils.getLevenshteinDistance(possibleVal, harmonizedValue);
1135 if (dist >= harmonizedValue.length()) {
1136 // short value, all characters are different. Don't warn, might say Value '10' for key 'fee' looks like 'no'.
1137 continue;
1138 }
1139 if (dist < minDist) {
1140 minDist = dist;
1141 fixVals.clear();
1142 fixVals.add(possibleVal);
1143 } else if (dist == minDist) {
1144 fixVals.add(possibleVal);
1145 }
1146 }
1147 }
1148 if (minDist <= MAX_LEVENSHTEIN_DISTANCE && maxPresetValueLen > MAX_LEVENSHTEIN_DISTANCE
1149 && !fixVals.isEmpty()
1150 && (harmonizedValue.length() > 3 || minDist < MAX_LEVENSHTEIN_DISTANCE)) {
1151 filterDeprecatedTags(p, key, fixVals);
1152 if (!fixVals.isEmpty()) {
1153 if (fixVals.size() < 2) {
1154 fixedValue = fixVals.get(0);
1155 } else {
1156 Collections.sort(fixVals);
1157 // misspelled preset value with multiple good alternatives
1158 errors.add(TestError.builder(this, Severity.WARNING, MISSPELLED_VALUE_NO_FIX)
1159 .message(tr("Unknown property value"),
1160 marktr("Value ''{0}'' for key ''{1}'' is unknown, maybe one of {2} is meant?"),
1161 value, key, fixVals)
1162 .primitives(p).build());
1163 withErrors.put(p, "WPV");
1164 return;
1165 }
1166 }
1167 }
1168 }
1169 if (fixedValue != null && !fixedValue.equals(value)) {
1170 final String newValue = fixedValue;
1171 // misspelled preset value
1172 errors.add(TestError.builder(this, Severity.WARNING, MISSPELLED_VALUE)
1173 .message(tr("Unknown property value"),
1174 marktr("Value ''{0}'' for key ''{1}'' is unknown, maybe ''{2}'' is meant?"), value, key, newValue)
1175 .primitives(p)
1176 .build());
1177 withErrors.put(p, "WPV");
1178 } else if (includeOtherSeverity) {
1179 // unknown preset value
1180 errors.add(TestError.builder(this, Severity.OTHER, INVALID_VALUE)
1181 .message(tr("Presets do not contain property value"),
1182 marktr("Value ''{0}'' for key ''{1}'' not in presets."), value, key)
1183 .primitives(p)
1184 .build());
1185 withErrors.put(p, "UPV");
1186 }
1187 }
1188
1189 // see #19180
1190 private void filterDeprecatedTags(OsmPrimitive p, String key, List<String> fixVals) {
1191 if (fixVals.isEmpty() || deprecatedChecker == null)
1192 return;
1193
1194 int unchangedDeprecated = countDeprecated(p);
1195
1196 // see #19895: create deep clone. This complex method works even with locked files
1197 MergeSourceBuildingVisitor builder = new MergeSourceBuildingVisitor(p.getDataSet());
1198 p.accept(builder);
1199 DataSet clonedDs = builder.build();
1200 OsmPrimitive clone = clonedDs.getPrimitiveById(p.getPrimitiveId());
1201
1202 Iterator<String> iter = fixVals.iterator();
1203 while (iter.hasNext()) {
1204 clone.put(key, iter.next());
1205 if (countDeprecated(clone) > unchangedDeprecated)
1206 iter.remove();
1207 }
1208 }
1209
1210 private int countDeprecated(OsmPrimitive p) {
1211 if (deprecatedChecker == null)
1212 return 0;
1213 deprecatedChecker.getErrors().clear();
1214 deprecatedChecker.visit(Collections.singleton(p), url -> url.endsWith("deprecated.mapcss"));
1215 return deprecatedChecker.getErrors().size();
1216 }
1217
1218 private static boolean isNum(String harmonizedValue) {
1219 try {
1220 Double.parseDouble(harmonizedValue);
1221 return true;
1222 } catch (NumberFormatException e) {
1223 return false;
1224 }
1225 }
1226
1227 private static boolean isFixme(String key, String value) {
1228 return key.toLowerCase(Locale.ENGLISH).contains("fixme") || key.contains("todo")
1229 || value.toLowerCase(Locale.ENGLISH).contains("fixme") || value.contains("check and delete");
1230 }
1231
1232 private static String harmonizeKey(String key) {
1233 return Utils.strip(key.toLowerCase(Locale.ENGLISH).replace('-', '_').replace(':', '_').replace(' ', '_'), "-_;:,");
1234 }
1235
1236 private static String harmonizeValue(String value) {
1237 return Utils.strip(value.toLowerCase(Locale.ENGLISH).replace('-', '_').replace(' ', '_'), "-_;:,");
1238 }
1239
1240 @Override
1241 public void startTest(ProgressMonitor monitor) {
1242 super.startTest(monitor);
1243 super.setShowElements(true);
1244 includeOtherSeverity = includeOtherSeverityChecks();
1245 checkKeys = Config.getPref().getBoolean(PREF_CHECK_KEYS, true);
1246 if (isBeforeUpload) {
1247 checkKeys = checkKeys && Config.getPref().getBoolean(PREF_CHECK_KEYS_BEFORE_UPLOAD, true);
1248 }
1249
1250 checkValues = Config.getPref().getBoolean(PREF_CHECK_VALUES, true);
1251 if (isBeforeUpload) {
1252 checkValues = checkValues && Config.getPref().getBoolean(PREF_CHECK_VALUES_BEFORE_UPLOAD, true);
1253 }
1254
1255 checkComplex = Config.getPref().getBoolean(PREF_CHECK_COMPLEX, true);
1256 if (isBeforeUpload) {
1257 checkComplex = checkComplex && Config.getPref().getBoolean(PREF_CHECK_COMPLEX_BEFORE_UPLOAD, true);
1258 }
1259
1260 checkFixmes = includeOtherSeverity && Config.getPref().getBoolean(PREF_CHECK_FIXMES, true);
1261 if (isBeforeUpload) {
1262 checkFixmes = checkFixmes && Config.getPref().getBoolean(PREF_CHECK_FIXMES_BEFORE_UPLOAD, true);
1263 }
1264
1265 checkPresetsTypes = includeOtherSeverity && Config.getPref().getBoolean(PREF_CHECK_PRESETS_TYPES, true);
1266 if (isBeforeUpload) {
1267 checkPresetsTypes = checkPresetsTypes && Config.getPref().getBoolean(PREF_CHECK_PRESETS_TYPES_BEFORE_UPLOAD, true);
1268 }
1269
1270 checkRegions = Config.getPref().getBoolean(PREF_CHECK_REGIONS, true);
1271 if (isBeforeUpload) {
1272 checkRegions = checkRegions && Config.getPref().getBoolean(PREF_CHECK_REGIONS_BEFORE_UPLOAD, true);
1273 }
1274 deprecatedChecker = OsmValidator.getTest(MapCSSTagChecker.class);
1275 ignoreForOuterMPSameTagCheck.addAll(Config.getPref().getList(PREF_KEYS_IGNORE_OUTER_MP_SAME_TAG, Collections.emptyList()));
1276 }
1277
1278 @Override
1279 public void endTest() {
1280 deprecatedChecker = null;
1281 super.endTest();
1282 }
1283
1284 @Override
1285 public void visit(Collection<OsmPrimitive> selection) {
1286 if (checkKeys || checkValues || checkComplex || checkFixmes || checkPresetsTypes || checkRegions) {
1287 super.visit(selection);
1288 }
1289 }
1290
1291 @Override
1292 public void addGui(JPanel testPanel) {
1293 GBC a = GBC.eol();
1294 a.anchor = GridBagConstraints.LINE_END;
1295
1296 testPanel.add(new JLabel(name+" :"), GBC.eol().insets(3, 0, 0, 0));
1297
1298 prefCheckKeys = new JCheckBox(tr("Check property keys."), Config.getPref().getBoolean(PREF_CHECK_KEYS, true));
1299 prefCheckKeys.setToolTipText(tr("Validate that property keys are valid checking against list of words."));
1300 testPanel.add(prefCheckKeys, GBC.std().insets(20, 0, 0, 0));
1301
1302 prefCheckKeysBeforeUpload = new JCheckBox();
1303 prefCheckKeysBeforeUpload.setSelected(Config.getPref().getBoolean(PREF_CHECK_KEYS_BEFORE_UPLOAD, true));
1304 testPanel.add(prefCheckKeysBeforeUpload, a);
1305
1306 prefCheckComplex = new JCheckBox(tr("Use complex property checker."), Config.getPref().getBoolean(PREF_CHECK_COMPLEX, true));
1307 prefCheckComplex.setToolTipText(tr("Validate property values and tags using complex rules."));
1308 testPanel.add(prefCheckComplex, GBC.std().insets(20, 0, 0, 0));
1309
1310 prefCheckComplexBeforeUpload = new JCheckBox();
1311 prefCheckComplexBeforeUpload.setSelected(Config.getPref().getBoolean(PREF_CHECK_COMPLEX_BEFORE_UPLOAD, true));
1312 testPanel.add(prefCheckComplexBeforeUpload, a);
1313
1314 final Collection<String> sources = Config.getPref().getList(PREF_SOURCES, DEFAULT_SOURCES);
1315 sourcesList = new EditableList(tr("TagChecker source"));
1316 sourcesList.setItems(sources);
1317 testPanel.add(new JLabel(tr("Data sources ({0})", "*.cfg")), GBC.eol().insets(23, 0, 0, 0));
1318 testPanel.add(sourcesList, GBC.eol().fill(GridBagConstraints.HORIZONTAL).insets(23, 0, 0, 0));
1319
1320 ActionListener disableCheckActionListener = e -> handlePrefEnable();
1321 prefCheckKeys.addActionListener(disableCheckActionListener);
1322 prefCheckKeysBeforeUpload.addActionListener(disableCheckActionListener);
1323 prefCheckComplex.addActionListener(disableCheckActionListener);
1324 prefCheckComplexBeforeUpload.addActionListener(disableCheckActionListener);
1325
1326 handlePrefEnable();
1327
1328 prefCheckValues = new JCheckBox(tr("Check property values."), Config.getPref().getBoolean(PREF_CHECK_VALUES, true));
1329 prefCheckValues.setToolTipText(tr("Validate that property values are valid checking against presets."));
1330 testPanel.add(prefCheckValues, GBC.std().insets(20, 0, 0, 0));
1331
1332 prefCheckValuesBeforeUpload = new JCheckBox();
1333 prefCheckValuesBeforeUpload.setSelected(Config.getPref().getBoolean(PREF_CHECK_VALUES_BEFORE_UPLOAD, true));
1334 testPanel.add(prefCheckValuesBeforeUpload, a);
1335
1336 prefCheckFixmes = new JCheckBox(tr("Check for fixme."), Config.getPref().getBoolean(PREF_CHECK_FIXMES, true));
1337 prefCheckFixmes.setToolTipText(tr("Looks for nodes or ways with fixme in any property value."));
1338 testPanel.add(prefCheckFixmes, GBC.std().insets(20, 0, 0, 0));
1339
1340 prefCheckFixmesBeforeUpload = new JCheckBox();
1341 prefCheckFixmesBeforeUpload.setSelected(Config.getPref().getBoolean(PREF_CHECK_FIXMES_BEFORE_UPLOAD, true));
1342 testPanel.add(prefCheckFixmesBeforeUpload, a);
1343
1344 prefCheckPresetsTypes = new JCheckBox(tr("Check for presets types."), Config.getPref().getBoolean(PREF_CHECK_PRESETS_TYPES, true));
1345 prefCheckPresetsTypes.setToolTipText(tr("Validate that objects types are valid checking against presets."));
1346 testPanel.add(prefCheckPresetsTypes, GBC.std().insets(20, 0, 0, 0));
1347
1348 prefCheckPresetsTypesBeforeUpload = new JCheckBox();
1349 prefCheckPresetsTypesBeforeUpload.setSelected(Config.getPref().getBoolean(PREF_CHECK_PRESETS_TYPES_BEFORE_UPLOAD, true));
1350 testPanel.add(prefCheckPresetsTypesBeforeUpload, a);
1351
1352 prefCheckRegions = new JCheckBox(tr("Check for regions."), Config.getPref().getBoolean(PREF_CHECK_REGIONS, true));
1353 prefCheckRegions.setToolTipText(tr("Validate that objects are in the correct region."));
1354 testPanel.add(prefCheckRegions, GBC.std().insets(20, 0, 0, 0));
1355
1356 prefCheckRegionsBeforeUpload = new JCheckBox();
1357 prefCheckRegionsBeforeUpload.setSelected(Config.getPref().getBoolean(PREF_CHECK_REGIONS_BEFORE_UPLOAD, true));
1358 testPanel.add(prefCheckRegionsBeforeUpload, a);
1359 }
1360
1361 /**
1362 * Enables/disables the source list field
1363 */
1364 public void handlePrefEnable() {
1365 boolean selected = prefCheckKeys.isSelected() || prefCheckKeysBeforeUpload.isSelected()
1366 || prefCheckComplex.isSelected() || prefCheckComplexBeforeUpload.isSelected();
1367 sourcesList.setEnabled(selected);
1368 }
1369
1370 @Override
1371 public boolean ok() {
1372 enabled = prefCheckKeys.isSelected() || prefCheckValues.isSelected() || prefCheckComplex.isSelected() || prefCheckFixmes.isSelected();
1373 testBeforeUpload = prefCheckKeysBeforeUpload.isSelected() || prefCheckValuesBeforeUpload.isSelected()
1374 || prefCheckFixmesBeforeUpload.isSelected() || prefCheckComplexBeforeUpload.isSelected();
1375
1376 Config.getPref().putBoolean(PREF_CHECK_VALUES, prefCheckValues.isSelected());
1377 Config.getPref().putBoolean(PREF_CHECK_COMPLEX, prefCheckComplex.isSelected());
1378 Config.getPref().putBoolean(PREF_CHECK_KEYS, prefCheckKeys.isSelected());
1379 Config.getPref().putBoolean(PREF_CHECK_FIXMES, prefCheckFixmes.isSelected());
1380 Config.getPref().putBoolean(PREF_CHECK_PRESETS_TYPES, prefCheckPresetsTypes.isSelected());
1381 Config.getPref().putBoolean(PREF_CHECK_REGIONS, prefCheckRegions.isSelected());
1382 Config.getPref().putBoolean(PREF_CHECK_VALUES_BEFORE_UPLOAD, prefCheckValuesBeforeUpload.isSelected());
1383 Config.getPref().putBoolean(PREF_CHECK_COMPLEX_BEFORE_UPLOAD, prefCheckComplexBeforeUpload.isSelected());
1384 Config.getPref().putBoolean(PREF_CHECK_KEYS_BEFORE_UPLOAD, prefCheckKeysBeforeUpload.isSelected());
1385 Config.getPref().putBoolean(PREF_CHECK_FIXMES_BEFORE_UPLOAD, prefCheckFixmesBeforeUpload.isSelected());
1386 Config.getPref().putBoolean(PREF_CHECK_PRESETS_TYPES_BEFORE_UPLOAD, prefCheckPresetsTypesBeforeUpload.isSelected());
1387 Config.getPref().putBoolean(PREF_CHECK_REGIONS_BEFORE_UPLOAD, prefCheckRegionsBeforeUpload.isSelected());
1388 return Config.getPref().putList(PREF_SOURCES, sourcesList.getItems());
1389 }
1390
1391 @Override
1392 public Command fixError(TestError testError) {
1393 List<Command> commands = new ArrayList<>(50);
1394
1395 Collection<? extends OsmPrimitive> primitives = testError.getPrimitives();
1396 for (OsmPrimitive p : primitives) {
1397 Map<String, String> tags = p.getKeys();
1398 if (tags.isEmpty()) {
1399 continue;
1400 }
1401
1402 for (Entry<String, String> prop: tags.entrySet()) {
1403 String key = prop.getKey();
1404 String value = prop.getValue();
1405 if (Utils.isBlank(value)) {
1406 commands.add(new ChangePropertyCommand(p, key, null));
1407 } else if (value.startsWith(" ") || value.endsWith(" ") || value.contains(" ")) {
1408 commands.add(new ChangePropertyCommand(p, key, Utils.removeWhiteSpaces(value)));
1409 } else if (key.startsWith(" ") || key.endsWith(" ") || key.contains(" ")) {
1410 commands.add(new ChangePropertyKeyCommand(p, key, Utils.removeWhiteSpaces(key)));
1411 } else {
1412 String evalue = Entities.unescape(value);
1413 if (!evalue.equals(value)) {
1414 commands.add(new ChangePropertyCommand(p, key, evalue));
1415 }
1416 }
1417 }
1418 }
1419
1420 if (commands.isEmpty())
1421 return null;
1422 if (commands.size() == 1)
1423 return commands.get(0);
1424
1425 return new SequenceCommand(tr("Fix tags"), commands);
1426 }
1427
1428 @Override
1429 public boolean isFixable(TestError testError) {
1430 if (testError.getTester() instanceof TagChecker) {
1431 int code = testError.getCode();
1432 return code == EMPTY_VALUES || code == INVALID_SPACE ||
1433 code == INVALID_KEY_SPACE || code == INVALID_HTML ||
1434 code == MULTIPLE_SPACES;
1435 }
1436
1437 return false;
1438 }
1439
1440 @Override
1441 public void taggingPresetsModified() {
1442 try {
1443 initializeData();
1444 initializePresets();
1445 analysePresets();
1446 } catch (IOException e) {
1447 Logging.error(e);
1448 }
1449 }
1450}
Note: See TracBrowser for help on using the repository browser.