1 | // License: GPL. For details, see LICENSE file.
|
---|
2 | package org.openstreetmap.josm.tools;
|
---|
3 |
|
---|
4 | import static org.openstreetmap.josm.tools.I18n.tr;
|
---|
5 | import static org.openstreetmap.josm.tools.I18n.trn;
|
---|
6 |
|
---|
7 | import java.util.Arrays;
|
---|
8 | import java.util.HashMap;
|
---|
9 | import java.util.Map;
|
---|
10 | import java.util.Map.Entry;
|
---|
11 | import java.util.regex.Matcher;
|
---|
12 | import java.util.regex.Pattern;
|
---|
13 |
|
---|
14 | import org.openstreetmap.josm.Main;
|
---|
15 |
|
---|
16 | /**
|
---|
17 | * Class that helps to parse tags from arbitrary text
|
---|
18 | */
|
---|
19 | public final class TextTagParser {
|
---|
20 |
|
---|
21 | // properties need JOSM restart to apply, modified rarely enough
|
---|
22 | private static final int MAX_KEY_LENGTH = Main.pref.getInteger("tags.paste.max-key-length", 50);
|
---|
23 | private static final int MAX_KEY_COUNT = Main.pref.getInteger("tags.paste.max-key-count", 30);
|
---|
24 | private static final String KEY_PATTERN = Main.pref.get("tags.paste.tag-pattern", "[0-9a-zA-Z:_]*");
|
---|
25 | private static final int MAX_VALUE_LENGTH = 255;
|
---|
26 |
|
---|
27 | private TextTagParser() {
|
---|
28 | // Hide default constructor for utils classes
|
---|
29 | }
|
---|
30 |
|
---|
31 | /**
|
---|
32 | * A helper class that analyzes the text and attempts to parse tags from it
|
---|
33 | */
|
---|
34 | public static class TextAnalyzer {
|
---|
35 | private boolean quotesStarted;
|
---|
36 | private boolean esc;
|
---|
37 | private final StringBuilder s = new StringBuilder(200);
|
---|
38 | private int pos;
|
---|
39 | private final String data;
|
---|
40 | private final int n;
|
---|
41 |
|
---|
42 | /**
|
---|
43 | * Create a new {@link TextAnalyzer}
|
---|
44 | * @param text The text to parse
|
---|
45 | */
|
---|
46 | public TextAnalyzer(String text) {
|
---|
47 | pos = 0;
|
---|
48 | data = text;
|
---|
49 | n = data.length();
|
---|
50 | }
|
---|
51 |
|
---|
52 | /**
|
---|
53 | * Read tags from "Free format"
|
---|
54 | * @return map of tags
|
---|
55 | */
|
---|
56 | private Map<String, String> getFreeParsedTags() {
|
---|
57 | String k, v;
|
---|
58 | Map<String, String> tags = new HashMap<>();
|
---|
59 |
|
---|
60 | while (true) {
|
---|
61 | skipEmpty();
|
---|
62 | if (pos == n) {
|
---|
63 | break;
|
---|
64 | }
|
---|
65 | k = parseString("\n\r\t= ");
|
---|
66 | if (pos == n) {
|
---|
67 | tags.clear();
|
---|
68 | break;
|
---|
69 | }
|
---|
70 | skipSign();
|
---|
71 | if (pos == n) {
|
---|
72 | tags.clear();
|
---|
73 | break;
|
---|
74 | }
|
---|
75 | v = parseString("\n\r\t ");
|
---|
76 | tags.put(k, v);
|
---|
77 | }
|
---|
78 | return tags;
|
---|
79 | }
|
---|
80 |
|
---|
81 | private String parseString(String stopChars) {
|
---|
82 | char[] stop = stopChars.toCharArray();
|
---|
83 | Arrays.sort(stop);
|
---|
84 | char c;
|
---|
85 | while (pos < n) {
|
---|
86 | c = data.charAt(pos);
|
---|
87 | if (esc) {
|
---|
88 | esc = false;
|
---|
89 | s.append(c); // \" \\
|
---|
90 | } else if (c == '\\') {
|
---|
91 | esc = true;
|
---|
92 | } else if (c == '\"' && !quotesStarted) { // opening "
|
---|
93 | if (!s.toString().trim().isEmpty()) { // we had ||some text"||
|
---|
94 | s.append(c); // just add ", not open
|
---|
95 | } else {
|
---|
96 | s.delete(0, s.length()); // forget that empty characthers and start reading "....
|
---|
97 | quotesStarted = true;
|
---|
98 | }
|
---|
99 | } else if (c == '\"' && quotesStarted) { // closing "
|
---|
100 | quotesStarted = false;
|
---|
101 | pos++;
|
---|
102 | break;
|
---|
103 | } else if (!quotesStarted && (Arrays.binarySearch(stop, c) >= 0)) {
|
---|
104 | // stop-symbol found
|
---|
105 | pos++;
|
---|
106 | break;
|
---|
107 | } else {
|
---|
108 | // skip non-printable characters
|
---|
109 | if (c >= 32) s.append(c);
|
---|
110 | }
|
---|
111 | pos++;
|
---|
112 | }
|
---|
113 |
|
---|
114 | String res = s.toString();
|
---|
115 | s.delete(0, s.length());
|
---|
116 | return res.trim();
|
---|
117 | }
|
---|
118 |
|
---|
119 | private void skipSign() {
|
---|
120 | char c;
|
---|
121 | boolean signFound = false;
|
---|
122 | while (pos < n) {
|
---|
123 | c = data.charAt(pos);
|
---|
124 | if (c == '\t' || c == '\n' || c == ' ') {
|
---|
125 | pos++;
|
---|
126 | } else if (c == '=') {
|
---|
127 | if (signFound) break; // a = =qwerty means "a"="=qwerty"
|
---|
128 | signFound = true;
|
---|
129 | pos++;
|
---|
130 | } else {
|
---|
131 | break;
|
---|
132 | }
|
---|
133 | }
|
---|
134 | }
|
---|
135 |
|
---|
136 | private void skipEmpty() {
|
---|
137 | char c;
|
---|
138 | while (pos < n) {
|
---|
139 | c = data.charAt(pos);
|
---|
140 | if (c == '\t' || c == '\n' || c == '\r' || c == ' ') {
|
---|
141 | pos++;
|
---|
142 | } else {
|
---|
143 | break;
|
---|
144 | }
|
---|
145 | }
|
---|
146 | }
|
---|
147 | }
|
---|
148 |
|
---|
149 | static String unescape(String k) {
|
---|
150 | if (!(k.startsWith("\"") && k.endsWith("\""))) {
|
---|
151 | if (k.contains("=")) {
|
---|
152 | // '=' not in quotes will be treated as an error!
|
---|
153 | return null;
|
---|
154 | } else {
|
---|
155 | return k;
|
---|
156 | }
|
---|
157 | }
|
---|
158 | String text = k.substring(1, k.length()-1);
|
---|
159 | return (new TextAnalyzer(text)).parseString("\r\t\n");
|
---|
160 | }
|
---|
161 |
|
---|
162 | /**
|
---|
163 | * Try to find tag-value pairs in given text
|
---|
164 | * @param text - text in which tags are looked for
|
---|
165 | * @param splitRegex - text is splitted into parts with this delimiter
|
---|
166 | * @param tagRegex - each part is matched against this regex
|
---|
167 | * @param unescapeTextInQuotes - if true, matched tag and value will be analyzed more thoroughly
|
---|
168 | * @return map of tags
|
---|
169 | */
|
---|
170 | public static Map<String, String> readTagsByRegexp(String text, String splitRegex, String tagRegex, boolean unescapeTextInQuotes) {
|
---|
171 | String[] lines = text.split(splitRegex);
|
---|
172 | Pattern p = Pattern.compile(tagRegex);
|
---|
173 | Map<String, String> tags = new HashMap<>();
|
---|
174 | String k;
|
---|
175 | String v;
|
---|
176 | for (String line: lines) {
|
---|
177 | if (line.trim().isEmpty()) continue; // skip empty lines
|
---|
178 | Matcher m = p.matcher(line);
|
---|
179 | if (m.matches()) {
|
---|
180 | k = m.group(1).trim();
|
---|
181 | v = m.group(2).trim();
|
---|
182 | if (unescapeTextInQuotes) {
|
---|
183 | k = unescape(k);
|
---|
184 | v = unescape(v);
|
---|
185 | if (k == null || v == null) return null;
|
---|
186 | }
|
---|
187 | tags.put(k, v);
|
---|
188 | } else {
|
---|
189 | return null;
|
---|
190 | }
|
---|
191 | }
|
---|
192 | if (!tags.isEmpty()) {
|
---|
193 | return tags;
|
---|
194 | } else {
|
---|
195 | return null;
|
---|
196 | }
|
---|
197 | }
|
---|
198 |
|
---|
199 | /**
|
---|
200 | * Gets a list of tags that are in the given text
|
---|
201 | * @param buf The text to parse
|
---|
202 | * @param callback warning callback
|
---|
203 | * @return The tags or <code>null</code> if the tags are not valid
|
---|
204 | * @since 12683
|
---|
205 | */
|
---|
206 | public static Map<String, String> getValidatedTagsFromText(String buf, TagWarningCallback callback) {
|
---|
207 | Map<String, String> tags = readTagsFromText(buf);
|
---|
208 | return validateTags(tags, callback) ? tags : null;
|
---|
209 | }
|
---|
210 |
|
---|
211 | /**
|
---|
212 | * Apply different methods to extract tag-value pairs from arbitrary text
|
---|
213 | * @param buf buffer
|
---|
214 | * @return null if no format is suitable
|
---|
215 | */
|
---|
216 | public static Map<String, String> readTagsFromText(String buf) {
|
---|
217 | Map<String, String> tags;
|
---|
218 |
|
---|
219 | // Format
|
---|
220 | // tag1\tval1\ntag2\tval2\n
|
---|
221 | tags = readTagsByRegexp(buf, "[\\r\\n]+", ".*?([a-zA-Z0-9:_]+).*\\t(.*?)", false);
|
---|
222 | // try "tag\tvalue\n" format
|
---|
223 | if (tags != null) return tags;
|
---|
224 |
|
---|
225 | // Format
|
---|
226 | // a=b \n c=d \n "a b"=hello
|
---|
227 | // SORRY: "a=b" = c is not supported fror now, only first = will be considered
|
---|
228 | // a = "b=c" is OK
|
---|
229 | // a = b=c - this method of parsing fails intentionally
|
---|
230 | tags = readTagsByRegexp(buf, "[\\n\\t\\r]+", "(.*?)=(.*?)", true);
|
---|
231 | // try format t1=v1\n t2=v2\n ...
|
---|
232 | if (tags != null) return tags;
|
---|
233 |
|
---|
234 | // JSON-format
|
---|
235 | String bufJson = buf.trim();
|
---|
236 | // trim { }, if there are any
|
---|
237 | if (bufJson.startsWith("{") && bufJson.endsWith("}"))
|
---|
238 | bufJson = bufJson.substring(1, bufJson.length()-1);
|
---|
239 | tags = readTagsByRegexp(bufJson, "[\\s]*,[\\s]*",
|
---|
240 | "[\\s]*(\\\".*?[^\\\\]\\\")"+"[\\s]*:[\\s]*"+"(\\\".*?[^\\\\]\\\")[\\s]*", true);
|
---|
241 | if (tags != null) return tags;
|
---|
242 |
|
---|
243 | // Free format
|
---|
244 | // a 1 "b" 2 c=3 d 4 e "5"
|
---|
245 | return new TextAnalyzer(buf).getFreeParsedTags();
|
---|
246 | }
|
---|
247 |
|
---|
248 | /**
|
---|
249 | * Check tags for correctness and display warnings if needed
|
---|
250 | * @param tags - map key->value to check
|
---|
251 | * @param callback warning callback
|
---|
252 | * @return true if the tags should be pasted
|
---|
253 | * @since 12683
|
---|
254 | */
|
---|
255 | public static boolean validateTags(Map<String, String> tags, TagWarningCallback callback) {
|
---|
256 | int r;
|
---|
257 | int s = tags.size();
|
---|
258 | if (s > MAX_KEY_COUNT) {
|
---|
259 | // Use trn() even if for english it makes no sense, as s > 30
|
---|
260 | r = callback.warning(trn("There was {0} tag found in the buffer, it is suspicious!",
|
---|
261 | "There were {0} tags found in the buffer, it is suspicious!", s,
|
---|
262 | s), "", "tags.paste.toomanytags");
|
---|
263 | if (r == 2 || r == 3) return false; if (r == 4) return true;
|
---|
264 | }
|
---|
265 | for (Entry<String, String> entry : tags.entrySet()) {
|
---|
266 | String key = entry.getKey();
|
---|
267 | String value = entry.getValue();
|
---|
268 | if (key.length() > MAX_KEY_LENGTH) {
|
---|
269 | r = callback.warning(tr("Key is too long (max {0} characters):", MAX_KEY_LENGTH), key+'='+value, "tags.paste.keytoolong");
|
---|
270 | if (r == 2 || r == 3) return false; if (r == 4) return true;
|
---|
271 | }
|
---|
272 | if (!key.matches(KEY_PATTERN)) {
|
---|
273 | r = callback.warning(tr("Suspicious characters in key:"), key, "tags.paste.keydoesnotmatch");
|
---|
274 | if (r == 2 || r == 3) return false; if (r == 4) return true;
|
---|
275 | }
|
---|
276 | if (value.length() > MAX_VALUE_LENGTH) {
|
---|
277 | r = callback.warning(tr("Value is too long (max {0} characters):", MAX_VALUE_LENGTH), value, "tags.paste.valuetoolong");
|
---|
278 | if (r == 2 || r == 3) return false; if (r == 4) return true;
|
---|
279 | }
|
---|
280 | }
|
---|
281 | return true;
|
---|
282 | }
|
---|
283 |
|
---|
284 | /**
|
---|
285 | * Called when a problematic tag is encountered.
|
---|
286 | * @since 12683
|
---|
287 | */
|
---|
288 | @FunctionalInterface
|
---|
289 | public interface TagWarningCallback {
|
---|
290 | /**
|
---|
291 | * Displays a warning about a problematic tag and ask user what to do about it.
|
---|
292 | * @param text Message to display
|
---|
293 | * @param data Tag key and/or value
|
---|
294 | * @param code to use with {@code ExtendedDialog#toggleEnable(String)}
|
---|
295 | * @return 1 to validate and display next warnings if any, 2 to cancel operation, 3 to clear buffer, 4 to paste tags
|
---|
296 | */
|
---|
297 | int warning(String text, String data, String code);
|
---|
298 | }
|
---|
299 | }
|
---|