source: josm/trunk/src/org/openstreetmap/josm/tools/TextTagParser.java@ 8487

Last change on this file since 8487 was 8482, checked in by simon04, 9 years ago

Correctly parse tab-separated tags

… by using non-greedy regular expression for prefix to ignore

  • Property svn:eol-style set to native
File size: 11.7 KB
Line 
1// License: GPL. For details, see LICENSE file.
2package org.openstreetmap.josm.tools;
3
4import static org.openstreetmap.josm.tools.I18n.tr;
5import static org.openstreetmap.josm.tools.I18n.trn;
6
7import java.awt.GridBagLayout;
8import java.util.Arrays;
9import java.util.HashMap;
10import java.util.Map;
11import java.util.Map.Entry;
12import java.util.regex.Matcher;
13import java.util.regex.Pattern;
14
15import javax.swing.JLabel;
16import javax.swing.JOptionPane;
17import javax.swing.JPanel;
18
19import org.openstreetmap.josm.Main;
20import org.openstreetmap.josm.gui.ExtendedDialog;
21import org.openstreetmap.josm.gui.help.HelpUtil;
22import org.openstreetmap.josm.gui.widgets.UrlLabel;
23import org.openstreetmap.josm.io.XmlWriter;
24import org.openstreetmap.josm.tools.LanguageInfo.LocaleType;
25
26/**
27 * Class that helps to parse tags from arbitrary text
28 */
29public final class TextTagParser {
30
31 // properties need JOSM restart to apply, modified rarely enough
32 private static final int MAX_KEY_LENGTH = Main.pref.getInteger("tags.paste.max-key-length", 50);
33 private static final int MAX_KEY_COUNT = Main.pref.getInteger("tags.paste.max-key-count", 30);
34 private static final String KEY_PATTERN = Main.pref.get("tags.paste.tag-pattern", "[0-9a-zA-Z:_]*");
35 private static final int MAX_VALUE_LENGTH = 255;
36
37 private TextTagParser() {
38 // Hide default constructor for utils classes
39 }
40
41 public static class TextAnalyzer {
42 private boolean quotesStarted = false;
43 private boolean esc = false;
44 private StringBuilder s = new StringBuilder(200);
45 private int pos;
46 private String data;
47 private int n;
48
49 public TextAnalyzer(String text) {
50 pos = 0;
51 data = text;
52 n = data.length();
53 }
54
55 /**
56 * Read tags from "Free format"
57 */
58 Map<String, String> getFreeParsedTags() {
59 String k, v;
60 Map<String, String> tags = new HashMap<>();
61
62 while (true) {
63 skipEmpty();
64 if (pos == n) { break; }
65 k = parseString("\n\r\t= ");
66 if (pos == n) { tags.clear(); break; }
67 skipSign();
68 if (pos == n) { tags.clear(); break; }
69 v = parseString("\n\r\t ");
70 tags.put(k, v);
71 }
72 return tags;
73 }
74
75 private String parseString(String stopChars) {
76 char[] stop = stopChars.toCharArray();
77 Arrays.sort(stop);
78 char c;
79 while (pos < n) {
80 c = data.charAt(pos);
81 if (esc) {
82 esc = false;
83 s.append(c); // \" \\
84 } else if (c == '\\') {
85 esc = true;
86 } else if (c == '\"' && !quotesStarted) { // opening "
87 if (!s.toString().trim().isEmpty()) { // we had ||some text"||
88 s.append(c); // just add ", not open
89 } else {
90 s.delete(0, s.length()); // forget that empty characthers and start reading "....
91 quotesStarted = true;
92 }
93 } else if (c == '\"' && quotesStarted) { // closing "
94 quotesStarted = false;
95 pos++;
96 break;
97 } else if (!quotesStarted && (Arrays.binarySearch(stop, c)>=0)) {
98 // stop-symbol found
99 pos++;
100 break;
101 } else {
102 // skip non-printable characters
103 if(c>=32) s.append(c);
104 }
105 pos++;
106 }
107
108 String res = s.toString();
109 s.delete(0, s.length());
110 return res.trim();
111 }
112
113 private void skipSign() {
114 char c;
115 boolean signFound = false;
116 while (pos < n) {
117 c = data.charAt(pos);
118 if (c == '\t' || c == '\n' || c == ' ') {
119 pos++;
120 } else if (c== '=') {
121 if (signFound) break; // a = =qwerty means "a"="=qwerty"
122 signFound = true;
123 pos++;
124 } else {
125 break;
126 }
127 }
128 }
129
130 private void skipEmpty() {
131 char c;
132 while (pos < n) {
133 c = data.charAt(pos);
134 if (c == '\t' || c == '\n' || c == '\r' || c == ' ') {
135 pos++;
136 } else {
137 break;
138 }
139 }
140 }
141 }
142
143 protected static String unescape(String k) {
144 if (!(k.startsWith("\"") && k.endsWith("\""))) {
145 if (k.contains("=")) {
146 // '=' not in quotes will be treated as an error!
147 return null;
148 } else {
149 return k;
150 }
151 }
152 String text = k.substring(1,k.length()-1);
153 return (new TextAnalyzer(text)).parseString("\r\t\n");
154 }
155
156 /**
157 * Try to find tag-value pairs in given text
158 * @param text - text in which tags are looked for
159 * @param splitRegex - text is splitted into parts with this delimiter
160 * @param tagRegex - each part is matched against this regex
161 * @param unescapeTextInQuotes - if true, matched tag and value will be analyzed more thoroughly
162 */
163 public static Map<String, String> readTagsByRegexp(String text, String splitRegex, String tagRegex, boolean unescapeTextInQuotes) {
164 String[] lines = text.split(splitRegex);
165 Pattern p = Pattern.compile(tagRegex);
166 Map<String, String> tags = new HashMap<>();
167 String k=null, v=null;
168 for (String line: lines) {
169 if (line.trim().isEmpty()) continue; // skip empty lines
170 Matcher m = p.matcher(line);
171 if (m.matches()) {
172 k=m.group(1).trim(); v=m.group(2).trim();
173 if (unescapeTextInQuotes) {
174 k = unescape(k);
175 v = unescape(v);
176 if (k==null || v==null) return null;
177 }
178 tags.put(k,v);
179 } else {
180 return null;
181 }
182 }
183 if (!tags.isEmpty()) {
184 return tags;
185 } else {
186 return null;
187 }
188 }
189
190 public static Map<String,String> getValidatedTagsFromText(String buf) {
191 Map<String,String> tags = readTagsFromText(buf);
192 return validateTags(tags) ? tags : null;
193 }
194
195 /**
196 * Apply different methods to extract tag-value pairs from arbitrary text
197 * @param buf buffer
198 * @return null if no format is suitable
199 */
200 public static Map<String,String> readTagsFromText(String buf) {
201 Map<String,String> tags;
202
203 // Format
204 // tag1\tval1\ntag2\tval2\n
205 tags = readTagsByRegexp(buf, "[\\r\\n]+", ".*?([a-zA-Z0-9:_]+).*\\t(.*?)", false);
206 // try "tag\tvalue\n" format
207 if (tags!=null) return tags;
208
209 // Format
210 // a=b \n c=d \n "a b"=hello
211 // SORRY: "a=b" = c is not supported fror now, only first = will be considered
212 // a = "b=c" is OK
213 // a = b=c - this method of parsing fails intentionally
214 tags = readTagsByRegexp(buf, "[\\n\\t\\r]+", "(.*?)=(.*?)", true);
215 // try format t1=v1\n t2=v2\n ...
216 if (tags!=null) return tags;
217
218 // JSON-format
219 String bufJson = buf.trim();
220 // trim { }, if there are any
221 if (bufJson.startsWith("{") && bufJson.endsWith("}"))
222 bufJson = bufJson.substring(1, bufJson.length()-1);
223 tags = readTagsByRegexp(bufJson, "[\\s]*,[\\s]*",
224 "[\\s]*(\\\".*?[^\\\\]\\\")"+"[\\s]*:[\\s]*"+"(\\\".*?[^\\\\]\\\")[\\s]*", true);
225 if (tags!=null) return tags;
226
227 // Free format
228 // a 1 "b" 2 c=3 d 4 e "5"
229 return new TextAnalyzer(buf).getFreeParsedTags();
230 }
231
232 /**
233 * Check tags for correctness and display warnings if needed
234 * @param tags - map key-&gt;value to check
235 * @return true if the tags should be pasted
236 */
237 public static boolean validateTags(Map<String, String> tags) {
238 int r;
239 int s = tags.size();
240 if (s > MAX_KEY_COUNT) {
241 // Use trn() even if for english it makes no sense, as s > 30
242 r=warning(trn("There was {0} tag found in the buffer, it is suspicious!",
243 "There were {0} tags found in the buffer, it is suspicious!", s,
244 s), "", "tags.paste.toomanytags");
245 if (r==2 || r==3) return false; if (r==4) return true;
246 }
247 for (Entry<String, String> entry : tags.entrySet()) {
248 String key = entry.getKey();
249 String value = entry.getValue();
250 if (key.length() > MAX_KEY_LENGTH) {
251 r = warning(tr("Key is too long (max {0} characters):", MAX_KEY_LENGTH), key+"="+value, "tags.paste.keytoolong");
252 if (r==2 || r==3) return false; if (r==4) return true;
253 }
254 if (!key.matches(KEY_PATTERN)) {
255 r = warning(tr("Suspicious characters in key:"), key, "tags.paste.keydoesnotmatch");
256 if (r==2 || r==3) return false; if (r==4) return true;
257 }
258 if (value.length() > MAX_VALUE_LENGTH) {
259 r = warning(tr("Value is too long (max {0} characters):", MAX_VALUE_LENGTH), value, "tags.paste.valuetoolong");
260 if (r==2 || r==3) return false; if (r==4) return true;
261 }
262 }
263 return true;
264 }
265
266 private static int warning(String text, String data, String code) {
267 ExtendedDialog ed = new ExtendedDialog(
268 Main.parent,
269 tr("Do you want to paste these tags?"),
270 new String[]{tr("Ok"), tr("Cancel"), tr("Clear buffer"), tr("Ignore warnings")});
271 ed.setButtonIcons(new String[]{"ok", "cancel", "dialogs/delete", "pastetags"});
272 ed.setContent("<html><b>"+text + "</b><br/><br/><div width=\"300px\">"+XmlWriter.encode(data,true)+"</html>");
273 ed.setDefaultButton(2);
274 ed.setCancelButton(2);
275 ed.setIcon(JOptionPane.WARNING_MESSAGE);
276 ed.toggleEnable(code);
277 ed.showDialog();
278 int r = ed.getValue();
279 if (r==0) r = 2;
280 // clean clipboard if user asked
281 if (r==3) Utils.copyToClipboard("");
282 return r;
283 }
284
285 /**
286 * Shows message that the buffer can not be pasted, allowing user to clean the buffer
287 * @param helpTopic the help topic of the parent action
288 * TODO: Replace by proper HelpAwareOptionPane instead of self-made help link
289 */
290 public static void showBadBufferMessage(String helpTopic) {
291 String msg = tr("<html><p> Sorry, it is impossible to paste tags from buffer. It does not contain any JOSM object"
292 + " or suitable text. </p></html>");
293 JPanel p = new JPanel(new GridBagLayout());
294 p.add(new JLabel(msg),GBC.eop());
295 String helpUrl = HelpUtil.getHelpTopicUrl(HelpUtil.buildAbsoluteHelpTopic(helpTopic, LocaleType.DEFAULT));
296 if (helpUrl != null) {
297 p.add(new UrlLabel(helpUrl), GBC.eop());
298 }
299
300 ExtendedDialog ed = new ExtendedDialog(
301 Main.parent,
302 tr("Warning"),
303 new String[]{tr("Ok"), tr("Clear buffer")});
304
305 ed.setButtonIcons(new String[]{"ok", "dialogs/delete"});
306
307 ed.setContent(p);
308 ed.setDefaultButton(1);
309 ed.setCancelButton(1);
310 ed.setIcon(JOptionPane.WARNING_MESSAGE);
311 ed.toggleEnable("tags.paste.cleanbadbuffer");
312 ed.showDialog();
313
314 int r = ed.getValue();
315 // clean clipboard if user asked
316 if (r==2) Utils.copyToClipboard("");
317 }
318}
Note: See TracBrowser for help on using the repository browser.