Context Navigation

source: josm/trunk/test/unit/org/openstreetmap/josm/data/validation/routines/DomainValidatorTestIT.java@ 17275

Last change on this file since 17275 was 17275, checked in by Don-vip, 3 years ago

see #16567 - upgrade almost all tests to JUnit 5, except those depending on WiremockRule

See https://github.com/tomakehurst/wiremock/issues/684

Property svn:eol-style set to native

File size: 17.1 KB

Line
1	/*
2	* Licensed to the Apache Software Foundation (ASF) under one or more
3	* contributor license agreements. See the NOTICE file distributed with
4	* this work for additional information regarding copyright ownership.
5	* The ASF licenses this file to You under the Apache License, Version 2.0
6	* (the "License"); you may not use this file except in compliance with
7	* the License. You may obtain a copy of the License at
8	*
9	* http://www.apache.org/licenses/LICENSE-2.0
10	*
11	* Unless required by applicable law or agreed to in writing, software
12	* distributed under the License is distributed on an "AS IS" BASIS,
13	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14	* See the License for the specific language governing permissions and
15	* limitations under the License.
16	*/
17	package org.openstreetmap.josm.data.validation.routines;
18
19	import static org.junit.jupiter.api.Assertions.assertTrue;
20	import static org.junit.jupiter.api.Assertions.fail;
21
22	import java.io.BufferedReader;
23	import java.io.Closeable;
24	import java.io.File;
25	import java.io.FileInputStream;
26	import java.io.FileOutputStream;
27	import java.io.IOException;
28	import java.io.InputStream;
29	import java.io.InputStreamReader;
30	import java.lang.reflect.Field;
31	import java.lang.reflect.Modifier;
32	import java.net.ConnectException;
33	import java.net.HttpURLConnection;
34	import java.net.IDN;
35	import java.net.URL;
36	import java.nio.charset.StandardCharsets;
37	import java.text.SimpleDateFormat;
38	import java.util.Date;
39	import java.util.HashMap;
40	import java.util.HashSet;
41	import java.util.Iterator;
42	import java.util.Locale;
43	import java.util.Map;
44	import java.util.Set;
45	import java.util.TreeMap;
46	import java.util.regex.Matcher;
47	import java.util.regex.Pattern;
48
49	import org.junit.jupiter.api.extension.RegisterExtension;
50	import org.junit.jupiter.api.Test;
51	import org.openstreetmap.josm.testutils.JOSMTestRules;
52	import org.openstreetmap.josm.tools.Logging;
53
54	import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
55
56	/**
57	* Integration tests for the DomainValidator.
58	*
59	* @version $Revision: 1723861 $
60	*/
61	class DomainValidatorTestIT {
62
63	/**
64	* Setup rule
65	*/
66	@RegisterExtension
67	@SuppressFBWarnings(value = "URF_UNREAD_PUBLIC_OR_PROTECTED_FIELD")
68	public JOSMTestRules test = new JOSMTestRules().https();
69
70	/**
71	* Download and process local copy of http://data.iana.org/TLD/tlds-alpha-by-domain.txt
72	* Check if the internal TLD table is up to date
73	* Check if the internal TLD tables have any spurious entries
74	* @throws Exception if an error occurs
75	*/
76	@Test
77	void testIanaTldList() throws Exception {
78	// Check the arrays first as this affects later checks
79	// Doing this here makes it easier when updating the lists
80	boolean OK = true;
81	for (String list : new String[]{"INFRASTRUCTURE_TLDS", "COUNTRY_CODE_TLDS", "GENERIC_TLDS", "LOCAL_TLDS"}) {
82	OK &= isSortedLowerCase(list);
83	}
84	if (!OK) {
85	System.out.println("Fix arrays before retrying; cannot continue");
86	return;
87	}
88	Set<String> ianaTlds = new HashSet<>(); // keep for comparison with array contents
89	DomainValidator dv = DomainValidator.getInstance();
90	File txtFile = new File(System.getProperty("java.io.tmpdir"), "tlds-alpha-by-domain.txt");
91	long timestamp;
92	try {
93	timestamp = download(txtFile, "http://data.iana.org/TLD/tlds-alpha-by-domain.txt", 0L);
94	} catch (ConnectException e) {
95	Logging.error(e);
96	// Try again one more time in case of random network issue
97	timestamp = download(txtFile, "http://data.iana.org/TLD/tlds-alpha-by-domain.txt", 0L);
98	}
99	final File htmlFile = new File(System.getProperty("java.io.tmpdir"), "tlds-alpha-by-domain.html");
100	// N.B. sometimes the html file may be updated a day or so after the txt file
101	// if the txt file contains entries not found in the html file, try again in a day or two
102	download(htmlFile, "http://www.iana.org/domains/root/db", timestamp);
103
104	try (BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(txtFile), StandardCharsets.UTF_8))) {
105	String line;
106	final String header;
107	line = br.readLine(); // header
108	if (line != null && line.startsWith("# Version ")) {
109	header = line.substring(2);
110	} else {
111	throw new IOException("File does not have expected Version header");
112	}
113	final boolean generateUnicodeTlds = false; // Change this to generate Unicode TLDs as well
114
115	// Parse html page to get entries
116	Map<String, String[]> htmlInfo = getHtmlInfo(htmlFile);
117	Map<String, String> missingTLD = new TreeMap<>(); // stores entry and comments as String[]
118	Map<String, String> missingCC = new TreeMap<>();
119	while ((line = br.readLine()) != null) {
120	if (!line.startsWith("#")) {
121	final String unicodeTld; // only different from asciiTld if that was punycode
122	final String asciiTld = line.toLowerCase(Locale.ENGLISH);
123	if (line.startsWith("XN--")) {
124	unicodeTld = IDN.toUnicode(line);
125	} else {
126	unicodeTld = asciiTld;
127	}
128	if (!dv.isValidTld(asciiTld)) {
129	String[] info = htmlInfo.get(asciiTld);
130	if (info != null) {
131	String type = info[0];
132	String comment = info[1];
133	if ("country-code".equals(type)) { // Which list to use?
134	missingCC.put(asciiTld, unicodeTld + " " + comment);
135	if (generateUnicodeTlds) {
136	missingCC.put(unicodeTld, asciiTld + " " + comment);
137	}
138	} else {
139	missingTLD.put(asciiTld, unicodeTld + " " + comment);
140	if (generateUnicodeTlds) {
141	missingTLD.put(unicodeTld, asciiTld + " " + comment);
142	}
143	}
144	} else {
145	Logging.error("Expected to find HTML info for "+ asciiTld);
146	}
147	}
148	ianaTlds.add(asciiTld);
149	// Don't merge these conditions; generateUnicodeTlds is final so needs to be separate to avoid a warning
150	if (generateUnicodeTlds) {
151	if (!unicodeTld.equals(asciiTld)) {
152	ianaTlds.add(unicodeTld);
153	}
154	}
155	}
156	}
157	// List html entries not in TLD text list
158	for (String key : (new TreeMap<>(htmlInfo)).keySet()) {
159	if (!ianaTlds.contains(key)) {
160	if (isNotInRootZone(key)) {
161	Logging.info("HTML entry not yet in root zone: "+key);
162	} else {
163	Logging.warn("Expected to find text entry for html: "+key);
164	}
165	}
166	}
167	if (!missingTLD.isEmpty()) {
168	printMap(header, missingTLD, "TLD");
169	fail("missing TLD");
170	}
171	if (!missingCC.isEmpty()) {
172	printMap(header, missingCC, "CC");
173	fail("missing CC");
174	}
175	}
176	// Check if internal tables contain any additional entries
177	assertTrue(isInIanaList("INFRASTRUCTURE_TLDS", ianaTlds), String.join(System.lineSeparator(), Logging.getLastErrorAndWarnings()));
178	assertTrue(isInIanaList("COUNTRY_CODE_TLDS", ianaTlds), String.join(System.lineSeparator(), Logging.getLastErrorAndWarnings()));
179	assertTrue(isInIanaList("GENERIC_TLDS", ianaTlds), String.join(System.lineSeparator(), Logging.getLastErrorAndWarnings()));
180	// Don't check local TLDS assertTrue(isInIanaList("LOCAL_TLDS", ianaTlds));
181	}
182
183	private static void printMap(final String header, Map<String, String> map, String string) {
184	Logging.warn("Entries missing from "+ string +" List\n");
185	if (header != null) {
186	Logging.warn(" // Taken from " + header);
187	}
188	Iterator<Map.Entry<String, String>> it = map.entrySet().iterator();
189	while (it.hasNext()) {
190	Map.Entry<String, String> me = it.next();
191	Logging.warn(" \"" + me.getKey() + "\", // " + me.getValue());
192	}
193	Logging.warn(System.lineSeparator() + "Done");
194	}
195
196	@SuppressFBWarnings(value = "PERFORMANCE")
197	private static Map<String, String[]> getHtmlInfo(final File f) throws IOException {
198	final Map<String, String[]> info = new HashMap<>();
199
200	final Pattern domain = Pattern.compile(".*<a href=\"/domains/root/db/([^.]+)\\.html");
201	final Pattern type = Pattern.compile("\\s+<td>([^<]+)</td>");
202	final Pattern comment = Pattern.compile("\\s+<td>([^<]+)</td>");
203
204	try (BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(f), StandardCharsets.UTF_8))) {
205	String line;
206	while ((line = br.readLine()) != null) {
207	Matcher m = domain.matcher(line);
208	if (m.lookingAt()) {
209	String dom = m.group(1);
210	String typ = "??";
211	String com = "??";
212	line = br.readLine();
213	while (line != null && line.matches("^\\s*$")) { // extra blank lines introduced
214	line = br.readLine();
215	}
216	Matcher t = type.matcher(line);
217	if (t.lookingAt()) {
218	typ = t.group(1);
219	line = br.readLine();
220	if (line != null && line.matches("\\s+<!--.*")) {
221	while (line != null && !line.matches(".-->.")) {
222	line = br.readLine();
223	}
224	line = br.readLine();
225	}
226	// Should have comment; is it wrapped?
227	while (line != null && !line.matches(".</td>.")) {
228	line += " " +br.readLine();
229	}
230	Matcher n = comment.matcher(line);
231	if (n.lookingAt()) {
232	com = n.group(1);
233	}
234	// Don't save unused entries
235	if (!com.contains("Not assigned") && !com.contains("Retired") && !typ.equals("test")) {
236	info.put(dom.toLowerCase(Locale.ENGLISH), new String[]{typ, com});
237	}
238	} else {
239	Logging.error("Unexpected type: " + line);
240	}
241	}
242	}
243	}
244	return info;
245	}
246
247	/*
248	* Download a file if it is more recent than our cached copy.
249	* Unfortunately the server does not seem to honour If-Modified-Since for the
250	* Html page, so we check if it is newer than the txt file and skip download if so
251	*/
252	private static long download(File f, String tldurl, long timestamp) throws IOException {
253	final int HOUR = 60601000; // an hour in ms
254	final long modTime;
255	// For testing purposes, don't download files more than once an hour
256	if (f.canRead()) {
257	modTime = f.lastModified();
258	if (modTime > System.currentTimeMillis()-HOUR) {
259	Logging.debug("Skipping download - found recent " + f);
260	return modTime;
261	}
262	} else {
263	modTime = 0;
264	}
265	HttpURLConnection hc = (HttpURLConnection) new URL(tldurl).openConnection();
266	if (modTime > 0) {
267	SimpleDateFormat sdf = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss z"); //Sun, 06 Nov 1994 08:49:37 GMT
268	String since = sdf.format(new Date(modTime));
269	hc.addRequestProperty("If-Modified-Since", since);
270	Logging.debug("Found " + f + " with date " + since);
271	}
272	if (hc.getResponseCode() == 304) {
273	Logging.debug("Already have most recent " + tldurl);
274	} else {
275	Logging.debug("Downloading " + tldurl);
276	byte[] buff = new byte[1024];
277	try (InputStream is = hc.getInputStream();
278	FileOutputStream fos = new FileOutputStream(f)) {
279	int len;
280	while ((len = is.read(buff)) != -1) {
281	fos.write(buff, 0, len);
282	}
283	}
284	Logging.debug("Done");
285	}
286	return f.lastModified();
287	}
288
289	/**
290	* Check whether the domain is in the root zone currently.
291	* Reads the URL http://www.iana.org/domains/root/db/domain.html
292	* (using a local disk cache)
293	* and checks for the string "This domain is not present in the root zone at this time."
294	* @param domain the domain to check
295	* @return true if the string is found
296	*/
297	private static boolean isNotInRootZone(String domain) {
298	String tldurl = "http://www.iana.org/domains/root/db/" + domain + ".html";
299	BufferedReader in = null;
300	try {
301	File rootCheck = new File(System.getProperty("java.io.tmpdir"), "tld_" + domain + ".html");
302	download(rootCheck, tldurl, 0L);
303	in = new BufferedReader(new InputStreamReader(new FileInputStream(rootCheck), StandardCharsets.UTF_8));
304	String inputLine;
305	while ((inputLine = in.readLine()) != null) {
306	if (inputLine.contains("This domain is not present in the root zone at this time.")) {
307	return true;
308	}
309	}
310	in.close();
311	} catch (IOException e) {
312	e.printStackTrace();
313	} finally {
314	closeQuietly(in);
315	}
316	return false;
317	}
318
319	private static void closeQuietly(Closeable in) {
320	if (in != null) {
321	try {
322	in.close();
323	} catch (IOException e) {
324	e.printStackTrace();
325	}
326	}
327	}
328
329	// isInIanaList and isSorted are split into two methods.
330	// If/when access to the arrays is possible without reflection, the intermediate
331	// methods can be dropped
332	private static boolean isInIanaList(String arrayName, Set<String> ianaTlds) throws Exception {
333	Field f = DomainValidator.class.getDeclaredField(arrayName);
334	final boolean isPrivate = Modifier.isPrivate(f.getModifiers());
335	if (isPrivate) {
336	f.setAccessible(true);
337	}
338	String[] array = (String[]) f.get(null);
339	try {
340	return isInIanaList(arrayName, array, ianaTlds);
341	} finally {
342	if (isPrivate) {
343	f.setAccessible(false);
344	}
345	}
346	}
347
348	private static boolean isInIanaList(String name, String[] array, Set<String> ianaTlds) {
349	boolean ok = true;
350	for (int i = 0; i < array.length; i++) {
351	if (!ianaTlds.contains(array[i])) {
352	Logging.error(name + " contains unexpected value: " + array[i]);
353	ok = false;
354	}
355	}
356	return ok;
357	}
358
359	private static boolean isSortedLowerCase(String arrayName) throws Exception {
360	Field f = DomainValidator.class.getDeclaredField(arrayName);
361	final boolean isPrivate = Modifier.isPrivate(f.getModifiers());
362	if (isPrivate) {
363	f.setAccessible(true);
364	}
365	String[] array = (String[]) f.get(null);
366	try {
367	return isSortedLowerCase(arrayName, array);
368	} finally {
369	if (isPrivate) {
370	f.setAccessible(false);
371	}
372	}
373	}
374
375	private static boolean isLowerCase(String string) {
376	return string.equals(string.toLowerCase(Locale.ENGLISH));
377	}
378
379	// Check if an array is strictly sorted - and lowerCase
380	private static boolean isSortedLowerCase(String name, String[] array) {
381	boolean sorted = true;
382	boolean strictlySorted = true;
383	final int length = array.length;
384	boolean lowerCase = isLowerCase(array[length-1]); // Check the last entry
385	for (int i = 0; i < length-1; i++) { // compare all but last entry with next
386	final String entry = array[i];
387	final String nextEntry = array[i+1];
388	final int cmp = entry.compareTo(nextEntry);
389	if (cmp > 0) { // out of order
390	Logging.error("Out of order entry: " + entry + " < " + nextEntry + " in " + name);
391	sorted = false;
392	} else if (cmp == 0) {
393	strictlySorted = false;
394	Logging.error("Duplicated entry: " + entry + " in " + name);
395	}
396	if (!isLowerCase(entry)) {
397	Logging.error("Non lowerCase entry: " + entry + " in " + name);
398	lowerCase = false;
399	}
400	}
401	return sorted && strictlySorted && lowerCase;
402	}
403	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: