1 | // License: GPL. For details, see LICENSE file.
|
---|
2 | package org.openstreetmap.josm.tools;
|
---|
3 |
|
---|
4 | import java.util.Arrays;
|
---|
5 | import java.util.HashMap;
|
---|
6 | import java.util.Map;
|
---|
7 |
|
---|
8 | /**
|
---|
9 | * A helper class that analyzes the text and attempts to parse tags from it
|
---|
10 | * @since 13544 (extracted from {@link TextTagParser})
|
---|
11 | */
|
---|
12 | public class TextAnalyzer {
|
---|
13 | private boolean quotesStarted;
|
---|
14 | private boolean esc;
|
---|
15 | private final StringBuilder s = new StringBuilder(200);
|
---|
16 | private String valueStops = "\n\r\t";
|
---|
17 | private int pos;
|
---|
18 | private final String data;
|
---|
19 | private final int n;
|
---|
20 |
|
---|
21 | /**
|
---|
22 | * Create a new {@link TextAnalyzer}
|
---|
23 | * @param text The text to parse
|
---|
24 | */
|
---|
25 | public TextAnalyzer(String text) {
|
---|
26 | pos = 0;
|
---|
27 | data = Utils.strip(text);
|
---|
28 | n = data.length();
|
---|
29 | // fix #1604: allow space characters as value stops for single-line input only
|
---|
30 | if (data.indexOf('\r') == -1 && data.indexOf('\n') == -1) {
|
---|
31 | valueStops += " ";
|
---|
32 | }
|
---|
33 | }
|
---|
34 |
|
---|
35 | /**
|
---|
36 | * Read tags from "Free format"
|
---|
37 | * @return map of tags
|
---|
38 | */
|
---|
39 | public Map<String, String> getFreeParsedTags() {
|
---|
40 | String k, v;
|
---|
41 | Map<String, String> tags = new HashMap<>();
|
---|
42 |
|
---|
43 | while (true) {
|
---|
44 | skipEmpty();
|
---|
45 | if (pos == n) {
|
---|
46 | break;
|
---|
47 | }
|
---|
48 | k = parseString("\n\r\t= ");
|
---|
49 | if (pos == n) {
|
---|
50 | tags.clear();
|
---|
51 | break;
|
---|
52 | }
|
---|
53 | skipSign();
|
---|
54 | if (pos == n) {
|
---|
55 | tags.clear();
|
---|
56 | break;
|
---|
57 | }
|
---|
58 | v = parseString(valueStops);
|
---|
59 | tags.put(k, v);
|
---|
60 | }
|
---|
61 | return tags;
|
---|
62 | }
|
---|
63 |
|
---|
64 | /**
|
---|
65 | * Parses current text to extract a key or value depending on given stop characters.
|
---|
66 | * @param stopChars Parsing will stop when one character of this string is found
|
---|
67 | * @return key or value extracted from current text
|
---|
68 | */
|
---|
69 | public String parseString(String stopChars) {
|
---|
70 | char[] stop = stopChars.toCharArray();
|
---|
71 | Arrays.sort(stop);
|
---|
72 | char c;
|
---|
73 | while (pos < n) {
|
---|
74 | c = data.charAt(pos);
|
---|
75 | if (esc) {
|
---|
76 | esc = false;
|
---|
77 | s.append(c); // \" \\
|
---|
78 | } else if (c == '\\') {
|
---|
79 | esc = true;
|
---|
80 | } else if (c == '\"' && !quotesStarted) { // opening "
|
---|
81 | if (!s.toString().trim().isEmpty()) { // we had ||some text"||
|
---|
82 | s.append(c); // just add ", not open
|
---|
83 | } else {
|
---|
84 | s.delete(0, s.length()); // forget that empty characthers and start reading "....
|
---|
85 | quotesStarted = true;
|
---|
86 | }
|
---|
87 | } else if (c == '\"' && quotesStarted) { // closing "
|
---|
88 | quotesStarted = false;
|
---|
89 | pos++;
|
---|
90 | break;
|
---|
91 | } else if (!quotesStarted && (Arrays.binarySearch(stop, c) >= 0)) {
|
---|
92 | // stop-symbol found
|
---|
93 | pos++;
|
---|
94 | break;
|
---|
95 | } else {
|
---|
96 | // skip non-printable characters
|
---|
97 | if (c >= 32) s.append(c);
|
---|
98 | }
|
---|
99 | pos++;
|
---|
100 | }
|
---|
101 |
|
---|
102 | String res = s.toString();
|
---|
103 | s.delete(0, s.length());
|
---|
104 | return res.trim();
|
---|
105 | }
|
---|
106 |
|
---|
107 | private void skipSign() {
|
---|
108 | char c;
|
---|
109 | boolean signFound = false;
|
---|
110 | while (pos < n) {
|
---|
111 | c = data.charAt(pos);
|
---|
112 | if (c == '\t' || c == '\n' || c == ' ') {
|
---|
113 | pos++;
|
---|
114 | } else if (c == '=') {
|
---|
115 | if (signFound) break; // a = =qwerty means "a"="=qwerty"
|
---|
116 | signFound = true;
|
---|
117 | pos++;
|
---|
118 | } else {
|
---|
119 | break;
|
---|
120 | }
|
---|
121 | }
|
---|
122 | }
|
---|
123 |
|
---|
124 | private void skipEmpty() {
|
---|
125 | char c;
|
---|
126 | while (pos < n) {
|
---|
127 | c = data.charAt(pos);
|
---|
128 | if (c == '\t' || c == '\n' || c == '\r' || c == ' ') {
|
---|
129 | pos++;
|
---|
130 | } else {
|
---|
131 | break;
|
---|
132 | }
|
---|
133 | }
|
---|
134 | }
|
---|
135 | }
|
---|