source: josm/trunk/test/unit/org/openstreetmap/josm/data/validation/routines/DomainValidatorTestIT.java@ 17275

Last change on this file since 17275 was 17275, checked in by Don-vip, 3 years ago

see #16567 - upgrade almost all tests to JUnit 5, except those depending on WiremockRule

See https://github.com/tomakehurst/wiremock/issues/684

  • Property svn:eol-style set to native
File size: 17.1 KB
Line 
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17package org.openstreetmap.josm.data.validation.routines;
18
19import static org.junit.jupiter.api.Assertions.assertTrue;
20import static org.junit.jupiter.api.Assertions.fail;
21
22import java.io.BufferedReader;
23import java.io.Closeable;
24import java.io.File;
25import java.io.FileInputStream;
26import java.io.FileOutputStream;
27import java.io.IOException;
28import java.io.InputStream;
29import java.io.InputStreamReader;
30import java.lang.reflect.Field;
31import java.lang.reflect.Modifier;
32import java.net.ConnectException;
33import java.net.HttpURLConnection;
34import java.net.IDN;
35import java.net.URL;
36import java.nio.charset.StandardCharsets;
37import java.text.SimpleDateFormat;
38import java.util.Date;
39import java.util.HashMap;
40import java.util.HashSet;
41import java.util.Iterator;
42import java.util.Locale;
43import java.util.Map;
44import java.util.Set;
45import java.util.TreeMap;
46import java.util.regex.Matcher;
47import java.util.regex.Pattern;
48
49import org.junit.jupiter.api.extension.RegisterExtension;
50import org.junit.jupiter.api.Test;
51import org.openstreetmap.josm.testutils.JOSMTestRules;
52import org.openstreetmap.josm.tools.Logging;
53
54import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
55
56/**
57 * Integration tests for the DomainValidator.
58 *
59 * @version $Revision: 1723861 $
60 */
61class DomainValidatorTestIT {
62
63 /**
64 * Setup rule
65 */
66 @RegisterExtension
67 @SuppressFBWarnings(value = "URF_UNREAD_PUBLIC_OR_PROTECTED_FIELD")
68 public JOSMTestRules test = new JOSMTestRules().https();
69
70 /**
71 * Download and process local copy of http://data.iana.org/TLD/tlds-alpha-by-domain.txt
72 * Check if the internal TLD table is up to date
73 * Check if the internal TLD tables have any spurious entries
74 * @throws Exception if an error occurs
75 */
76 @Test
77 void testIanaTldList() throws Exception {
78 // Check the arrays first as this affects later checks
79 // Doing this here makes it easier when updating the lists
80 boolean OK = true;
81 for (String list : new String[]{"INFRASTRUCTURE_TLDS", "COUNTRY_CODE_TLDS", "GENERIC_TLDS", "LOCAL_TLDS"}) {
82 OK &= isSortedLowerCase(list);
83 }
84 if (!OK) {
85 System.out.println("Fix arrays before retrying; cannot continue");
86 return;
87 }
88 Set<String> ianaTlds = new HashSet<>(); // keep for comparison with array contents
89 DomainValidator dv = DomainValidator.getInstance();
90 File txtFile = new File(System.getProperty("java.io.tmpdir"), "tlds-alpha-by-domain.txt");
91 long timestamp;
92 try {
93 timestamp = download(txtFile, "http://data.iana.org/TLD/tlds-alpha-by-domain.txt", 0L);
94 } catch (ConnectException e) {
95 Logging.error(e);
96 // Try again one more time in case of random network issue
97 timestamp = download(txtFile, "http://data.iana.org/TLD/tlds-alpha-by-domain.txt", 0L);
98 }
99 final File htmlFile = new File(System.getProperty("java.io.tmpdir"), "tlds-alpha-by-domain.html");
100 // N.B. sometimes the html file may be updated a day or so after the txt file
101 // if the txt file contains entries not found in the html file, try again in a day or two
102 download(htmlFile, "http://www.iana.org/domains/root/db", timestamp);
103
104 try (BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(txtFile), StandardCharsets.UTF_8))) {
105 String line;
106 final String header;
107 line = br.readLine(); // header
108 if (line != null && line.startsWith("# Version ")) {
109 header = line.substring(2);
110 } else {
111 throw new IOException("File does not have expected Version header");
112 }
113 final boolean generateUnicodeTlds = false; // Change this to generate Unicode TLDs as well
114
115 // Parse html page to get entries
116 Map<String, String[]> htmlInfo = getHtmlInfo(htmlFile);
117 Map<String, String> missingTLD = new TreeMap<>(); // stores entry and comments as String[]
118 Map<String, String> missingCC = new TreeMap<>();
119 while ((line = br.readLine()) != null) {
120 if (!line.startsWith("#")) {
121 final String unicodeTld; // only different from asciiTld if that was punycode
122 final String asciiTld = line.toLowerCase(Locale.ENGLISH);
123 if (line.startsWith("XN--")) {
124 unicodeTld = IDN.toUnicode(line);
125 } else {
126 unicodeTld = asciiTld;
127 }
128 if (!dv.isValidTld(asciiTld)) {
129 String[] info = htmlInfo.get(asciiTld);
130 if (info != null) {
131 String type = info[0];
132 String comment = info[1];
133 if ("country-code".equals(type)) { // Which list to use?
134 missingCC.put(asciiTld, unicodeTld + " " + comment);
135 if (generateUnicodeTlds) {
136 missingCC.put(unicodeTld, asciiTld + " " + comment);
137 }
138 } else {
139 missingTLD.put(asciiTld, unicodeTld + " " + comment);
140 if (generateUnicodeTlds) {
141 missingTLD.put(unicodeTld, asciiTld + " " + comment);
142 }
143 }
144 } else {
145 Logging.error("Expected to find HTML info for "+ asciiTld);
146 }
147 }
148 ianaTlds.add(asciiTld);
149 // Don't merge these conditions; generateUnicodeTlds is final so needs to be separate to avoid a warning
150 if (generateUnicodeTlds) {
151 if (!unicodeTld.equals(asciiTld)) {
152 ianaTlds.add(unicodeTld);
153 }
154 }
155 }
156 }
157 // List html entries not in TLD text list
158 for (String key : (new TreeMap<>(htmlInfo)).keySet()) {
159 if (!ianaTlds.contains(key)) {
160 if (isNotInRootZone(key)) {
161 Logging.info("HTML entry not yet in root zone: "+key);
162 } else {
163 Logging.warn("Expected to find text entry for html: "+key);
164 }
165 }
166 }
167 if (!missingTLD.isEmpty()) {
168 printMap(header, missingTLD, "TLD");
169 fail("missing TLD");
170 }
171 if (!missingCC.isEmpty()) {
172 printMap(header, missingCC, "CC");
173 fail("missing CC");
174 }
175 }
176 // Check if internal tables contain any additional entries
177 assertTrue(isInIanaList("INFRASTRUCTURE_TLDS", ianaTlds), String.join(System.lineSeparator(), Logging.getLastErrorAndWarnings()));
178 assertTrue(isInIanaList("COUNTRY_CODE_TLDS", ianaTlds), String.join(System.lineSeparator(), Logging.getLastErrorAndWarnings()));
179 assertTrue(isInIanaList("GENERIC_TLDS", ianaTlds), String.join(System.lineSeparator(), Logging.getLastErrorAndWarnings()));
180 // Don't check local TLDS assertTrue(isInIanaList("LOCAL_TLDS", ianaTlds));
181 }
182
183 private static void printMap(final String header, Map<String, String> map, String string) {
184 Logging.warn("Entries missing from "+ string +" List\n");
185 if (header != null) {
186 Logging.warn(" // Taken from " + header);
187 }
188 Iterator<Map.Entry<String, String>> it = map.entrySet().iterator();
189 while (it.hasNext()) {
190 Map.Entry<String, String> me = it.next();
191 Logging.warn(" \"" + me.getKey() + "\", // " + me.getValue());
192 }
193 Logging.warn(System.lineSeparator() + "Done");
194 }
195
196 @SuppressFBWarnings(value = "PERFORMANCE")
197 private static Map<String, String[]> getHtmlInfo(final File f) throws IOException {
198 final Map<String, String[]> info = new HashMap<>();
199
200 final Pattern domain = Pattern.compile(".*<a href=\"/domains/root/db/([^.]+)\\.html");
201 final Pattern type = Pattern.compile("\\s+<td>([^<]+)</td>");
202 final Pattern comment = Pattern.compile("\\s+<td>([^<]+)</td>");
203
204 try (BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(f), StandardCharsets.UTF_8))) {
205 String line;
206 while ((line = br.readLine()) != null) {
207 Matcher m = domain.matcher(line);
208 if (m.lookingAt()) {
209 String dom = m.group(1);
210 String typ = "??";
211 String com = "??";
212 line = br.readLine();
213 while (line != null && line.matches("^\\s*$")) { // extra blank lines introduced
214 line = br.readLine();
215 }
216 Matcher t = type.matcher(line);
217 if (t.lookingAt()) {
218 typ = t.group(1);
219 line = br.readLine();
220 if (line != null && line.matches("\\s+<!--.*")) {
221 while (line != null && !line.matches(".*-->.*")) {
222 line = br.readLine();
223 }
224 line = br.readLine();
225 }
226 // Should have comment; is it wrapped?
227 while (line != null && !line.matches(".*</td>.*")) {
228 line += " " +br.readLine();
229 }
230 Matcher n = comment.matcher(line);
231 if (n.lookingAt()) {
232 com = n.group(1);
233 }
234 // Don't save unused entries
235 if (!com.contains("Not assigned") && !com.contains("Retired") && !typ.equals("test")) {
236 info.put(dom.toLowerCase(Locale.ENGLISH), new String[]{typ, com});
237 }
238 } else {
239 Logging.error("Unexpected type: " + line);
240 }
241 }
242 }
243 }
244 return info;
245 }
246
247 /*
248 * Download a file if it is more recent than our cached copy.
249 * Unfortunately the server does not seem to honour If-Modified-Since for the
250 * Html page, so we check if it is newer than the txt file and skip download if so
251 */
252 private static long download(File f, String tldurl, long timestamp) throws IOException {
253 final int HOUR = 60*60*1000; // an hour in ms
254 final long modTime;
255 // For testing purposes, don't download files more than once an hour
256 if (f.canRead()) {
257 modTime = f.lastModified();
258 if (modTime > System.currentTimeMillis()-HOUR) {
259 Logging.debug("Skipping download - found recent " + f);
260 return modTime;
261 }
262 } else {
263 modTime = 0;
264 }
265 HttpURLConnection hc = (HttpURLConnection) new URL(tldurl).openConnection();
266 if (modTime > 0) {
267 SimpleDateFormat sdf = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss z"); //Sun, 06 Nov 1994 08:49:37 GMT
268 String since = sdf.format(new Date(modTime));
269 hc.addRequestProperty("If-Modified-Since", since);
270 Logging.debug("Found " + f + " with date " + since);
271 }
272 if (hc.getResponseCode() == 304) {
273 Logging.debug("Already have most recent " + tldurl);
274 } else {
275 Logging.debug("Downloading " + tldurl);
276 byte[] buff = new byte[1024];
277 try (InputStream is = hc.getInputStream();
278 FileOutputStream fos = new FileOutputStream(f)) {
279 int len;
280 while ((len = is.read(buff)) != -1) {
281 fos.write(buff, 0, len);
282 }
283 }
284 Logging.debug("Done");
285 }
286 return f.lastModified();
287 }
288
289 /**
290 * Check whether the domain is in the root zone currently.
291 * Reads the URL http://www.iana.org/domains/root/db/*domain*.html
292 * (using a local disk cache)
293 * and checks for the string "This domain is not present in the root zone at this time."
294 * @param domain the domain to check
295 * @return true if the string is found
296 */
297 private static boolean isNotInRootZone(String domain) {
298 String tldurl = "http://www.iana.org/domains/root/db/" + domain + ".html";
299 BufferedReader in = null;
300 try {
301 File rootCheck = new File(System.getProperty("java.io.tmpdir"), "tld_" + domain + ".html");
302 download(rootCheck, tldurl, 0L);
303 in = new BufferedReader(new InputStreamReader(new FileInputStream(rootCheck), StandardCharsets.UTF_8));
304 String inputLine;
305 while ((inputLine = in.readLine()) != null) {
306 if (inputLine.contains("This domain is not present in the root zone at this time.")) {
307 return true;
308 }
309 }
310 in.close();
311 } catch (IOException e) {
312 e.printStackTrace();
313 } finally {
314 closeQuietly(in);
315 }
316 return false;
317 }
318
319 private static void closeQuietly(Closeable in) {
320 if (in != null) {
321 try {
322 in.close();
323 } catch (IOException e) {
324 e.printStackTrace();
325 }
326 }
327 }
328
329 // isInIanaList and isSorted are split into two methods.
330 // If/when access to the arrays is possible without reflection, the intermediate
331 // methods can be dropped
332 private static boolean isInIanaList(String arrayName, Set<String> ianaTlds) throws Exception {
333 Field f = DomainValidator.class.getDeclaredField(arrayName);
334 final boolean isPrivate = Modifier.isPrivate(f.getModifiers());
335 if (isPrivate) {
336 f.setAccessible(true);
337 }
338 String[] array = (String[]) f.get(null);
339 try {
340 return isInIanaList(arrayName, array, ianaTlds);
341 } finally {
342 if (isPrivate) {
343 f.setAccessible(false);
344 }
345 }
346 }
347
348 private static boolean isInIanaList(String name, String[] array, Set<String> ianaTlds) {
349 boolean ok = true;
350 for (int i = 0; i < array.length; i++) {
351 if (!ianaTlds.contains(array[i])) {
352 Logging.error(name + " contains unexpected value: " + array[i]);
353 ok = false;
354 }
355 }
356 return ok;
357 }
358
359 private static boolean isSortedLowerCase(String arrayName) throws Exception {
360 Field f = DomainValidator.class.getDeclaredField(arrayName);
361 final boolean isPrivate = Modifier.isPrivate(f.getModifiers());
362 if (isPrivate) {
363 f.setAccessible(true);
364 }
365 String[] array = (String[]) f.get(null);
366 try {
367 return isSortedLowerCase(arrayName, array);
368 } finally {
369 if (isPrivate) {
370 f.setAccessible(false);
371 }
372 }
373 }
374
375 private static boolean isLowerCase(String string) {
376 return string.equals(string.toLowerCase(Locale.ENGLISH));
377 }
378
379 // Check if an array is strictly sorted - and lowerCase
380 private static boolean isSortedLowerCase(String name, String[] array) {
381 boolean sorted = true;
382 boolean strictlySorted = true;
383 final int length = array.length;
384 boolean lowerCase = isLowerCase(array[length-1]); // Check the last entry
385 for (int i = 0; i < length-1; i++) { // compare all but last entry with next
386 final String entry = array[i];
387 final String nextEntry = array[i+1];
388 final int cmp = entry.compareTo(nextEntry);
389 if (cmp > 0) { // out of order
390 Logging.error("Out of order entry: " + entry + " < " + nextEntry + " in " + name);
391 sorted = false;
392 } else if (cmp == 0) {
393 strictlySorted = false;
394 Logging.error("Duplicated entry: " + entry + " in " + name);
395 }
396 if (!isLowerCase(entry)) {
397 Logging.error("Non lowerCase entry: " + entry + " in " + name);
398 lowerCase = false;
399 }
400 }
401 return sorted && strictlySorted && lowerCase;
402 }
403}
Note: See TracBrowser for help on using the repository browser.