source: josm/trunk/src/org/openstreetmap/josm/tools/TextTagParser.java@ 13504

Last change on this file since 13504 was 12846, checked in by bastiK, 7 years ago

see #15229 - use Config.getPref() wherever possible

  • Property svn:eol-style set to native
File size: 10.6 KB
Line 
1// License: GPL. For details, see LICENSE file.
2package org.openstreetmap.josm.tools;
3
4import static org.openstreetmap.josm.tools.I18n.tr;
5import static org.openstreetmap.josm.tools.I18n.trn;
6
7import java.util.Arrays;
8import java.util.HashMap;
9import java.util.Map;
10import java.util.Map.Entry;
11import java.util.regex.Matcher;
12import java.util.regex.Pattern;
13
14import org.openstreetmap.josm.spi.preferences.Config;
15
16/**
17 * Class that helps to parse tags from arbitrary text
18 */
19public final class TextTagParser {
20
21 // properties need JOSM restart to apply, modified rarely enough
22 private static final int MAX_KEY_LENGTH = Config.getPref().getInt("tags.paste.max-key-length", 50);
23 private static final int MAX_KEY_COUNT = Config.getPref().getInt("tags.paste.max-key-count", 30);
24 private static final String KEY_PATTERN = Config.getPref().get("tags.paste.tag-pattern", "[0-9a-zA-Z:_]*");
25 private static final int MAX_VALUE_LENGTH = 255;
26
27 private TextTagParser() {
28 // Hide default constructor for utils classes
29 }
30
31 /**
32 * A helper class that analyzes the text and attempts to parse tags from it
33 */
34 public static class TextAnalyzer {
35 private boolean quotesStarted;
36 private boolean esc;
37 private final StringBuilder s = new StringBuilder(200);
38 private int pos;
39 private final String data;
40 private final int n;
41
42 /**
43 * Create a new {@link TextAnalyzer}
44 * @param text The text to parse
45 */
46 public TextAnalyzer(String text) {
47 pos = 0;
48 data = text;
49 n = data.length();
50 }
51
52 /**
53 * Read tags from "Free format"
54 * @return map of tags
55 */
56 private Map<String, String> getFreeParsedTags() {
57 String k, v;
58 Map<String, String> tags = new HashMap<>();
59
60 while (true) {
61 skipEmpty();
62 if (pos == n) {
63 break;
64 }
65 k = parseString("\n\r\t= ");
66 if (pos == n) {
67 tags.clear();
68 break;
69 }
70 skipSign();
71 if (pos == n) {
72 tags.clear();
73 break;
74 }
75 v = parseString("\n\r\t ");
76 tags.put(k, v);
77 }
78 return tags;
79 }
80
81 private String parseString(String stopChars) {
82 char[] stop = stopChars.toCharArray();
83 Arrays.sort(stop);
84 char c;
85 while (pos < n) {
86 c = data.charAt(pos);
87 if (esc) {
88 esc = false;
89 s.append(c); // \" \\
90 } else if (c == '\\') {
91 esc = true;
92 } else if (c == '\"' && !quotesStarted) { // opening "
93 if (!s.toString().trim().isEmpty()) { // we had ||some text"||
94 s.append(c); // just add ", not open
95 } else {
96 s.delete(0, s.length()); // forget that empty characthers and start reading "....
97 quotesStarted = true;
98 }
99 } else if (c == '\"' && quotesStarted) { // closing "
100 quotesStarted = false;
101 pos++;
102 break;
103 } else if (!quotesStarted && (Arrays.binarySearch(stop, c) >= 0)) {
104 // stop-symbol found
105 pos++;
106 break;
107 } else {
108 // skip non-printable characters
109 if (c >= 32) s.append(c);
110 }
111 pos++;
112 }
113
114 String res = s.toString();
115 s.delete(0, s.length());
116 return res.trim();
117 }
118
119 private void skipSign() {
120 char c;
121 boolean signFound = false;
122 while (pos < n) {
123 c = data.charAt(pos);
124 if (c == '\t' || c == '\n' || c == ' ') {
125 pos++;
126 } else if (c == '=') {
127 if (signFound) break; // a = =qwerty means "a"="=qwerty"
128 signFound = true;
129 pos++;
130 } else {
131 break;
132 }
133 }
134 }
135
136 private void skipEmpty() {
137 char c;
138 while (pos < n) {
139 c = data.charAt(pos);
140 if (c == '\t' || c == '\n' || c == '\r' || c == ' ') {
141 pos++;
142 } else {
143 break;
144 }
145 }
146 }
147 }
148
149 static String unescape(String k) {
150 if (!(k.startsWith("\"") && k.endsWith("\""))) {
151 if (k.contains("=")) {
152 // '=' not in quotes will be treated as an error!
153 return null;
154 } else {
155 return k;
156 }
157 }
158 String text = k.substring(1, k.length()-1);
159 return (new TextAnalyzer(text)).parseString("\r\t\n");
160 }
161
162 /**
163 * Try to find tag-value pairs in given text
164 * @param text - text in which tags are looked for
165 * @param splitRegex - text is splitted into parts with this delimiter
166 * @param tagRegex - each part is matched against this regex
167 * @param unescapeTextInQuotes - if true, matched tag and value will be analyzed more thoroughly
168 * @return map of tags
169 */
170 public static Map<String, String> readTagsByRegexp(String text, String splitRegex, String tagRegex, boolean unescapeTextInQuotes) {
171 String[] lines = text.split(splitRegex);
172 Pattern p = Pattern.compile(tagRegex);
173 Map<String, String> tags = new HashMap<>();
174 String k;
175 String v;
176 for (String line: lines) {
177 if (line.trim().isEmpty()) continue; // skip empty lines
178 Matcher m = p.matcher(line);
179 if (m.matches()) {
180 k = m.group(1).trim();
181 v = m.group(2).trim();
182 if (unescapeTextInQuotes) {
183 k = unescape(k);
184 v = unescape(v);
185 if (k == null || v == null) return null;
186 }
187 tags.put(k, v);
188 } else {
189 return null;
190 }
191 }
192 if (!tags.isEmpty()) {
193 return tags;
194 } else {
195 return null;
196 }
197 }
198
199 /**
200 * Gets a list of tags that are in the given text
201 * @param buf The text to parse
202 * @param callback warning callback
203 * @return The tags or <code>null</code> if the tags are not valid
204 * @since 12683
205 */
206 public static Map<String, String> getValidatedTagsFromText(String buf, TagWarningCallback callback) {
207 Map<String, String> tags = readTagsFromText(buf);
208 return validateTags(tags, callback) ? tags : null;
209 }
210
211 /**
212 * Apply different methods to extract tag-value pairs from arbitrary text
213 * @param buf buffer
214 * @return null if no format is suitable
215 */
216 public static Map<String, String> readTagsFromText(String buf) {
217 Map<String, String> tags;
218
219 // Format
220 // tag1\tval1\ntag2\tval2\n
221 tags = readTagsByRegexp(buf, "[\\r\\n]+", ".*?([a-zA-Z0-9:_]+).*\\t(.*?)", false);
222 // try "tag\tvalue\n" format
223 if (tags != null) return tags;
224
225 // Format
226 // a=b \n c=d \n "a b"=hello
227 // SORRY: "a=b" = c is not supported fror now, only first = will be considered
228 // a = "b=c" is OK
229 // a = b=c - this method of parsing fails intentionally
230 tags = readTagsByRegexp(buf, "[\\n\\t\\r]+", "(.*?)=(.*?)", true);
231 // try format t1=v1\n t2=v2\n ...
232 if (tags != null) return tags;
233
234 // JSON-format
235 String bufJson = buf.trim();
236 // trim { }, if there are any
237 if (bufJson.startsWith("{") && bufJson.endsWith("}"))
238 bufJson = bufJson.substring(1, bufJson.length()-1);
239 tags = readTagsByRegexp(bufJson, "[\\s]*,[\\s]*",
240 "[\\s]*(\\\".*?[^\\\\]\\\")"+"[\\s]*:[\\s]*"+"(\\\".*?[^\\\\]\\\")[\\s]*", true);
241 if (tags != null) return tags;
242
243 // Free format
244 // a 1 "b" 2 c=3 d 4 e "5"
245 return new TextAnalyzer(buf).getFreeParsedTags();
246 }
247
248 /**
249 * Check tags for correctness and display warnings if needed
250 * @param tags - map key-&gt;value to check
251 * @param callback warning callback
252 * @return true if the tags should be pasted
253 * @since 12683
254 */
255 public static boolean validateTags(Map<String, String> tags, TagWarningCallback callback) {
256 int r;
257 int s = tags.size();
258 if (s > MAX_KEY_COUNT) {
259 // Use trn() even if for english it makes no sense, as s > 30
260 r = callback.warning(trn("There was {0} tag found in the buffer, it is suspicious!",
261 "There were {0} tags found in the buffer, it is suspicious!", s,
262 s), "", "tags.paste.toomanytags");
263 if (r == 2 || r == 3) return false; if (r == 4) return true;
264 }
265 for (Entry<String, String> entry : tags.entrySet()) {
266 String key = entry.getKey();
267 String value = entry.getValue();
268 if (key.length() > MAX_KEY_LENGTH) {
269 r = callback.warning(tr("Key is too long (max {0} characters):", MAX_KEY_LENGTH), key+'='+value, "tags.paste.keytoolong");
270 if (r == 2 || r == 3) return false; if (r == 4) return true;
271 }
272 if (!key.matches(KEY_PATTERN)) {
273 r = callback.warning(tr("Suspicious characters in key:"), key, "tags.paste.keydoesnotmatch");
274 if (r == 2 || r == 3) return false; if (r == 4) return true;
275 }
276 if (value.length() > MAX_VALUE_LENGTH) {
277 r = callback.warning(tr("Value is too long (max {0} characters):", MAX_VALUE_LENGTH), value, "tags.paste.valuetoolong");
278 if (r == 2 || r == 3) return false; if (r == 4) return true;
279 }
280 }
281 return true;
282 }
283
284 /**
285 * Called when a problematic tag is encountered.
286 * @since 12683
287 */
288 @FunctionalInterface
289 public interface TagWarningCallback {
290 /**
291 * Displays a warning about a problematic tag and ask user what to do about it.
292 * @param text Message to display
293 * @param data Tag key and/or value
294 * @param code to use with {@code ExtendedDialog#toggleEnable(String)}
295 * @return 1 to validate and display next warnings if any, 2 to cancel operation, 3 to clear buffer, 4 to paste tags
296 */
297 int warning(String text, String data, String code);
298 }
299}
Note: See TracBrowser for help on using the repository browser.