source: josm/trunk/src/org/openstreetmap/josm/tools/TextTagParser.java@ 5755

Last change on this file since 5755 was 5755, checked in by akks, 11 years ago

Fix incorrect tag pasting from tab-delimited text, see #8384

File size: 10.1 KB
Line 
1package org.openstreetmap.josm.tools;
2
3import java.util.HashMap;
4import java.util.Map;
5import java.util.regex.Matcher;
6import java.util.regex.Pattern;
7import javax.swing.JOptionPane;
8import org.openstreetmap.josm.Main;
9import org.openstreetmap.josm.gui.ExtendedDialog;
10import org.openstreetmap.josm.io.XmlWriter;
11
12import static org.openstreetmap.josm.tools.I18n.tr;
13import static org.openstreetmap.josm.tools.I18n.trn;
14
15/**
16 * Class that helps to parse tags from arbitrary text
17 */
18public class TextTagParser {
19
20 // properties need JOSM restart to apply, modified rarely enough
21 protected static final int MAX_KEY_LENGTH = Main.pref.getInteger("tags.paste.max-key-length", 50);
22 protected static final int MAX_KEY_COUNT = Main.pref.getInteger("tags.paste.max-key-count", 30);
23 protected static final String KEY_PATTERN = Main.pref.get("tags.paste.tag-pattern", "[0-9a-zA-Z:_]*");
24 protected static final int MAX_VALUE_LENGTH = 255;
25
26 public static class TextAnalyzer {
27 int start = 0;
28 boolean keyFound = false;
29 boolean quotesStarted = false;
30 boolean esc = false;
31 StringBuilder s = new StringBuilder(200);
32 int pos;
33 String data;
34 int n;
35 boolean notFound;
36
37 public TextAnalyzer(String text) {
38 pos = 0;
39 data = text;
40 n = data.length();
41 }
42
43 /**
44 * Read tags from "Free format"
45 */
46 Map<String, String> getFreeParsedTags() {
47 String k, v;
48 Map<String, String> tags = new HashMap<String,String>();
49
50 while (true) {
51 skipEmpty();
52 if (pos == n) { break; }
53 k = parseString(true);
54 if (pos == n) { tags.clear(); break; }
55 skipSign();
56 if (pos == n) { tags.clear(); break; }
57 v = parseString(false);
58 tags.put(k, v);
59 }
60 return tags;
61 }
62
63 private String parseString(boolean stopOnEquals) {
64 char c;
65 while (pos < n) {
66 c = data.charAt(pos);
67 if (esc) {
68 esc = false;
69 s.append(c); // \" \\
70 } else if (c == '\\') {
71 esc = true;
72 } else if (c == '\"' && !quotesStarted) { // opening "
73 if (s.toString().trim().length()>0) { // we had ||some text"||
74 s.append(c); // just add ", not open
75 } else {
76 s.delete(0, s.length()); // forget that empty characthers and start reading "....
77 quotesStarted = true;
78 }
79 } else if (c == '\"' && quotesStarted) { // closing "
80 quotesStarted = false;
81 pos++;
82 break;
83 } else if (!quotesStarted && (c=='\n'|| c=='\t'|| c==' ' || c=='\r'
84 || (c=='=' && stopOnEquals))) { // stop-symbols
85 pos++;
86 break;
87 } else {
88 // skip non-printable characters
89 if(c>=32) s.append(c);
90 }
91 pos++;
92 }
93
94 String res = s.toString();
95 s.delete(0, s.length());
96 return res.trim();
97 }
98
99 private void skipSign() {
100 char c;
101 boolean signFound = false;;
102 while (pos < n) {
103 c = data.charAt(pos);
104 if (c == '\t' || c == '\n' || c == ' ') {
105 pos++;
106 } else if (c== '=') {
107 if (signFound) break; // a = =qwerty means "a"="=qwerty"
108 signFound = true;
109 pos++;
110 } else {
111 break;
112 }
113 }
114 }
115
116 private void skipEmpty() {
117 char c;
118 while (pos < n) {
119 c = data.charAt(pos);
120 if (c == '\t' || c == '\n' || c == '\r' || c == ' ' ) {
121 pos++;
122 } else {
123 break;
124 }
125 }
126 }
127 }
128
129 private static String unescape(String k) {
130 if(! (k.startsWith("\"") && k.endsWith("\"")) ) {
131 if (k.contains("=")) {
132 // '=' not in quotes will be treated as an error!
133 return null;
134 } else {
135 return k;
136 }
137 }
138 String text = k.substring(1,k.length()-1);
139 return (new TextAnalyzer(text)).parseString(false);
140 }
141
142 /**
143 * Try to find tag-value pairs in given text
144 * @param text - text in which tags are looked for
145 * @param splitRegex - text is splitted into parts with this delimiter
146 * @param tagRegex - each part is matched against this regex
147 * @param unescapeTextInQuotes - if true, matched tag and value will be analyzed more thoroughly
148 */
149 public static Map<String, String> readTagsByRegexp(String text, String splitRegex, String tagRegex, boolean unescapeTextInQuotes) {
150 String lines[] = text.split(splitRegex);
151 Pattern p = Pattern.compile(tagRegex);
152 Map<String, String> tags = new HashMap<String,String>();
153 String k=null, v=null;
154 for (String line: lines) {
155 if (line.trim().isEmpty()) continue; // skip empty lines
156 Matcher m = p.matcher(line);
157 if (m.matches()) {
158 k=m.group(1).trim(); v=m.group(2).trim();
159 if (unescapeTextInQuotes) {
160 k = unescape(k);
161 v = unescape(v);
162 if (k==null || v==null) return null;
163 }
164 tags.put(k,v);
165 } else {
166 return null;
167 }
168 }
169 if (!tags.isEmpty()) {
170 return tags;
171 } else {
172 return null;
173 }
174 }
175
176 public static Map<String,String> getValidatedTagsFromText(String buf) {
177 Map<String,String> tags = readTagsFromText(buf);
178 return validateTags(tags) ? tags : null;
179 }
180
181 /**
182 * Apply different methods to extract tag-value pairs from arbitrary text
183 * @param buf
184 * @return null if no format is suitable
185 */
186
187 public static Map<String,String> readTagsFromText(String buf) {
188 Map<String,String> tags;
189
190 // Format
191 // tag1\tval1\ntag2\tval2\n
192 tags = readTagsByRegexp(buf, "[\\r\\n]+", "(.*?)\\t(.*?)", false);
193 // try "tag\tvalue\n" format
194 if (tags!=null) return tags;
195
196 // Format
197 // a=b \n c=d \n "a b"=hello
198 // SORRY: "a=b" = c is not supported fror now, only first = will be considered
199 // a = "b=c" is OK
200 // a = b=c - this method of parsing fails intentionally
201 tags = readTagsByRegexp(buf, "[\\n\\t\\r]+", "(.*?)=(.*?)", true);
202 // try format t1=v1\n t2=v2\n ...
203 if (tags!=null) return tags;
204
205 // JSON-format
206 String bufJson = buf.trim();
207 // trim { }, if there are any
208 if (bufJson.startsWith("{") && bufJson.endsWith("}") ) bufJson = bufJson.substring(1,bufJson.length()-1);
209 tags = readTagsByRegexp(bufJson, "[\\s]*,[\\s]*",
210 "[\\s]*(\\\".*?[^\\\\]\\\")"+"[\\s]*:[\\s]*"+"(\\\".*?[^\\\\]\\\")[\\s]*", true);
211 if (tags!=null) return tags;
212
213 // Free format
214 // a 1 "b" 2 c=3 d 4 e "5"
215 TextAnalyzer parser = new TextAnalyzer(buf);
216 tags = parser.getFreeParsedTags();
217 return tags;
218 }
219
220 /**
221 * Check tags for correctness and display warnings if needed
222 * @param tags - map key->value to check
223 * @return true if user decision was "OK"
224 */
225 public static boolean validateTags(Map<String, String> tags) {
226 String value;
227 int r;
228 int s = tags.size();
229 if (s > MAX_KEY_COUNT) {
230 // Use trn() even if for english it makes no sense, as s > 30
231 r=warning(trn("There was {0} tag found in the buffer, it is suspicious!",
232 "There were {0} tags found in the buffer, it is suspicious!", s,
233 s), "", "toomanytags");
234 if (r==2) return false; if (r==3) return true;
235 }
236 for (String key: tags.keySet()) {
237 value = tags.get(key);
238 if (key.length() > MAX_KEY_LENGTH) {
239 r = warning(tr("Key is too long (max {0} characters):", MAX_KEY_LENGTH), key+"="+value, "keytoolong");
240 if (r==2) return false; if (r==3) return true;
241 }
242 if (!key.matches(KEY_PATTERN)) {
243 r = warning(tr("Suspicious characters in key:"), key, "keydoesnotmatch");
244 if (r==2) return false; if (r==3) return true;
245 }
246 if (value.length() > MAX_VALUE_LENGTH) {
247 r = warning(tr("Value is too long (max {0} characters):", MAX_VALUE_LENGTH), value, "valuetoolong");
248 if (r==2) return false; if (r==3) return true;
249 }
250 }
251 return true;
252 }
253
254 private static int warning(String text, String data, String code) {
255 ExtendedDialog ed = new ExtendedDialog(
256 Main.parent,
257 tr("Do you want to paste these tags?"),
258 new String[]{tr("Ok"), tr("Cancel"), tr("Ignore warnings")});
259 ed.setButtonIcons(new String[]{"ok.png", "cancel.png", "pastetags.png"});
260 ed.setContent("<html><b>"+text + "</b><br/><br/><div width=\"300px\">"+XmlWriter.encode(data,true)+"</html>");
261 ed.setDefaultButton(2);
262 ed.setCancelButton(2);
263 ed.setIcon(JOptionPane.WARNING_MESSAGE);
264 ed.toggleEnable(code);
265 ed.showDialog();
266 Object o = ed.getValue();
267 if (o instanceof Integer)
268 return ((Integer)o).intValue();
269 else
270 return 2;
271 }
272}
Note: See TracBrowser for help on using the repository browser.