Context Navigation

source: josm/trunk/test/unit/org/openstreetmap/josm/data/validation/routines/DomainValidatorTestIT.java@ 10756

Last change on this file since 10756 was 10756, checked in by Don-vip, 8 years ago
add robustness to DomainValidatorTestIT
Property svn:eol-style set to `native`
File size: 17.1 KB

Line
1	/*
2	* Licensed to the Apache Software Foundation (ASF) under one or more
3	* contributor license agreements. See the NOTICE file distributed with
4	* this work for additional information regarding copyright ownership.
5	* The ASF licenses this file to You under the Apache License, Version 2.0
6	* (the "License"); you may not use this file except in compliance with
7	* the License. You may obtain a copy of the License at
8	*
9	* http://www.apache.org/licenses/LICENSE-2.0
10	*
11	* Unless required by applicable law or agreed to in writing, software
12	* distributed under the License is distributed on an "AS IS" BASIS,
13	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14	* See the License for the specific language governing permissions and
15	* limitations under the License.
16	*/
17	package org.openstreetmap.josm.data.validation.routines;
18
19	import static org.junit.Assert.assertTrue;
20	import static org.junit.Assert.fail;
21
22	import java.io.BufferedReader;
23	import java.io.Closeable;
24	import java.io.File;
25	import java.io.FileInputStream;
26	import java.io.FileOutputStream;
27	import java.io.IOException;
28	import java.io.InputStream;
29	import java.io.InputStreamReader;
30	import java.lang.reflect.Field;
31	import java.lang.reflect.Modifier;
32	import java.net.ConnectException;
33	import java.net.HttpURLConnection;
34	import java.net.IDN;
35	import java.net.URL;
36	import java.nio.charset.StandardCharsets;
37	import java.text.SimpleDateFormat;
38	import java.util.Date;
39	import java.util.HashMap;
40	import java.util.HashSet;
41	import java.util.Iterator;
42	import java.util.Locale;
43	import java.util.Map;
44	import java.util.Set;
45	import java.util.TreeMap;
46	import java.util.regex.Matcher;
47	import java.util.regex.Pattern;
48
49	import org.junit.Test;
50	import org.openstreetmap.josm.Main;
51
52	import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
53
54	/**
55	* Integration tests for the DomainValidator.
56	*
57	* @version $Revision: 1723861 $
58	*/
59	public class DomainValidatorTestIT {
60
61	/**
62	* Download and process local copy of http://data.iana.org/TLD/tlds-alpha-by-domain.txt
63	* Check if the internal TLD table is up to date
64	* Check if the internal TLD tables have any spurious entries
65	* @throws Exception if an error occurs
66	*/
67	@Test
68	public void testIanaTldList() throws Exception {
69	// Check the arrays first as this affects later checks
70	// Doing this here makes it easier when updating the lists
71	boolean OK = true;
72	for (String list : new String[]{"INFRASTRUCTURE_TLDS", "COUNTRY_CODE_TLDS", "GENERIC_TLDS", "LOCAL_TLDS"}) {
73	OK &= isSortedLowerCase(list);
74	}
75	if (!OK) {
76	System.out.println("Fix arrays before retrying; cannot continue");
77	return;
78	}
79	Set<String> ianaTlds = new HashSet<>(); // keep for comparison with array contents
80	DomainValidator dv = DomainValidator.getInstance();
81	File txtFile = new File(System.getProperty("java.io.tmpdir"), "tlds-alpha-by-domain.txt");
82	long timestamp;
83	try {
84	timestamp = download(txtFile, "http://data.iana.org/TLD/tlds-alpha-by-domain.txt", 0L);
85	} catch (ConnectException e) {
86	Main.error(e);
87	// Try again one more time in case of random network issue
88	timestamp = download(txtFile, "http://data.iana.org/TLD/tlds-alpha-by-domain.txt", 0L);
89	}
90	final File htmlFile = new File(System.getProperty("java.io.tmpdir"), "tlds-alpha-by-domain.html");
91	// N.B. sometimes the html file may be updated a day or so after the txt file
92	// if the txt file contains entries not found in the html file, try again in a day or two
93	download(htmlFile, "http://www.iana.org/domains/root/db", timestamp);
94
95	try (BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(txtFile), StandardCharsets.UTF_8))) {
96	String line;
97	final String header;
98	line = br.readLine(); // header
99	if (line != null && line.startsWith("# Version ")) {
100	header = line.substring(2);
101	} else {
102	throw new IOException("File does not have expected Version header");
103	}
104	final boolean generateUnicodeTlds = false; // Change this to generate Unicode TLDs as well
105
106	// Parse html page to get entries
107	Map<String, String[]> htmlInfo = getHtmlInfo(htmlFile);
108	Map<String, String> missingTLD = new TreeMap<>(); // stores entry and comments as String[]
109	Map<String, String> missingCC = new TreeMap<>();
110	while ((line = br.readLine()) != null) {
111	if (!line.startsWith("#")) {
112	final String unicodeTld; // only different from asciiTld if that was punycode
113	final String asciiTld = line.toLowerCase(Locale.ENGLISH);
114	if (line.startsWith("XN--")) {
115	unicodeTld = IDN.toUnicode(line);
116	} else {
117	unicodeTld = asciiTld;
118	}
119	if (!dv.isValidTld(asciiTld)) {
120	String[] info = htmlInfo.get(asciiTld);
121	if (info != null) {
122	String type = info[0];
123	String comment = info[1];
124	if ("country-code".equals(type)) { // Which list to use?
125	missingCC.put(asciiTld, unicodeTld + " " + comment);
126	if (generateUnicodeTlds) {
127	missingCC.put(unicodeTld, asciiTld + " " + comment);
128	}
129	} else {
130	missingTLD.put(asciiTld, unicodeTld + " " + comment);
131	if (generateUnicodeTlds) {
132	missingTLD.put(unicodeTld, asciiTld + " " + comment);
133	}
134	}
135	} else {
136	System.err.println("Expected to find HTML info for "+ asciiTld);
137	}
138	}
139	ianaTlds.add(asciiTld);
140	// Don't merge these conditions; generateUnicodeTlds is final so needs to be separate to avoid a warning
141	if (generateUnicodeTlds) {
142	if (!unicodeTld.equals(asciiTld)) {
143	ianaTlds.add(unicodeTld);
144	}
145	}
146	}
147	}
148	// List html entries not in TLD text list
149	for (String key : (new TreeMap<>(htmlInfo)).keySet()) {
150	if (!ianaTlds.contains(key)) {
151	if (isNotInRootZone(key)) {
152	System.out.println("INFO: HTML entry not yet in root zone: "+key);
153	} else {
154	System.err.println("WARN: Expected to find text entry for html: "+key);
155	}
156	}
157	}
158	if (!missingTLD.isEmpty()) {
159	printMap(header, missingTLD, "TLD");
160	fail("missing TLD");
161	}
162	if (!missingCC.isEmpty()) {
163	printMap(header, missingCC, "CC");
164	fail("missing CC");
165	}
166	}
167	// Check if internal tables contain any additional entries
168	assertTrue(isInIanaList("INFRASTRUCTURE_TLDS", ianaTlds));
169	assertTrue(isInIanaList("COUNTRY_CODE_TLDS", ianaTlds));
170	assertTrue(isInIanaList("GENERIC_TLDS", ianaTlds));
171	// Don't check local TLDS assertTrue(isInIanaList("LOCAL_TLDS", ianaTlds));
172	}
173
174	private static void printMap(final String header, Map<String, String> map, String string) {
175	System.out.println("Entries missing from "+ string +" List\n");
176	if (header != null) {
177	System.out.println(" // Taken from " + header);
178	}
179	Iterator<Map.Entry<String, String>> it = map.entrySet().iterator();
180	while (it.hasNext()) {
181	Map.Entry<String, String> me = it.next();
182	System.out.println(" \"" + me.getKey() + "\", // " + me.getValue());
183	}
184	System.out.println("\nDone");
185	}
186
187	@SuppressFBWarnings(value = "PERFORMANCE")
188	private static Map<String, String[]> getHtmlInfo(final File f) throws IOException {
189	final Map<String, String[]> info = new HashMap<>();
190
191	// <td><span class="domain tld"><a href="/domains/root/db/ax.html">.ax</a></span></td>
192	final Pattern domain = Pattern.compile(".*<a href=\"/domains/root/db/([^.]+)\\.html");
193	// <td>country-code</td>
194	final Pattern type = Pattern.compile("\\s+<td>([^<]+)</td>");
195	// <!-- <td>Åland Islands<br/><span class="tld-table-so">Ålands landskapsregering</span></td> </td> -->
196	// <td>Ålands landskapsregering</td>
197	final Pattern comment = Pattern.compile("\\s+<td>([^<]+)</td>");
198
199	try (BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(f), StandardCharsets.UTF_8))) {
200	String line;
201	while ((line = br.readLine()) != null) {
202	Matcher m = domain.matcher(line);
203	if (m.lookingAt()) {
204	String dom = m.group(1);
205	String typ = "??";
206	String com = "??";
207	line = br.readLine();
208	while (line != null && line.matches("^\\s*$")) { // extra blank lines introduced
209	line = br.readLine();
210	}
211	Matcher t = type.matcher(line);
212	if (t.lookingAt()) {
213	typ = t.group(1);
214	line = br.readLine();
215	if (line != null && line.matches("\\s+<!--.*")) {
216	while (line != null && !line.matches(".-->.")) {
217	line = br.readLine();
218	}
219	line = br.readLine();
220	}
221	// Should have comment; is it wrapped?
222	while (line != null && !line.matches(".</td>.")) {
223	line += " " +br.readLine();
224	}
225	Matcher n = comment.matcher(line);
226	if (n.lookingAt()) {
227	com = n.group(1);
228	}
229	// Don't save unused entries
230	if (com.contains("Not assigned") \|\| com.contains("Retired") \|\| typ.equals("test")) {
231	// System.out.println("Ignored: " + typ + " " + dom + " " +com);
232	} else {
233	info.put(dom.toLowerCase(Locale.ENGLISH), new String[]{typ, com});
234	// System.out.println("Storing: " + typ + " " + dom + " " +com);
235	}
236	} else {
237	System.err.println("Unexpected type: " + line);
238	}
239	}
240	}
241	}
242	return info;
243	}
244
245	/*
246	* Download a file if it is more recent than our cached copy.
247	* Unfortunately the server does not seem to honour If-Modified-Since for the
248	* Html page, so we check if it is newer than the txt file and skip download if so
249	*/
250	private static long download(File f, String tldurl, long timestamp) throws IOException {
251	final int HOUR = 60601000; // an hour in ms
252	final long modTime;
253	// For testing purposes, don't download files more than once an hour
254	if (f.canRead()) {
255	modTime = f.lastModified();
256	if (modTime > System.currentTimeMillis()-HOUR) {
257	System.out.println("Skipping download - found recent " + f);
258	return modTime;
259	}
260	} else {
261	modTime = 0;
262	}
263	HttpURLConnection hc = (HttpURLConnection) new URL(tldurl).openConnection();
264	if (modTime > 0) {
265	SimpleDateFormat sdf = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss z"); //Sun, 06 Nov 1994 08:49:37 GMT
266	String since = sdf.format(new Date(modTime));
267	hc.addRequestProperty("If-Modified-Since", since);
268	System.out.println("Found " + f + " with date " + since);
269	}
270	if (hc.getResponseCode() == 304) {
271	System.out.println("Already have most recent " + tldurl);
272	} else {
273	System.out.println("Downloading " + tldurl);
274	byte[] buff = new byte[1024];
275	try (InputStream is = hc.getInputStream();
276	FileOutputStream fos = new FileOutputStream(f)) {
277	int len;
278	while ((len = is.read(buff)) != -1) {
279	fos.write(buff, 0, len);
280	}
281	}
282	System.out.println("Done");
283	}
284	return f.lastModified();
285	}
286
287	/**
288	* Check whether the domain is in the root zone currently.
289	* Reads the URL http://www.iana.org/domains/root/db/domain.html
290	* (using a local disk cache)
291	* and checks for the string "This domain is not present in the root zone at this time."
292	* @param domain the domain to check
293	* @return true if the string is found
294	*/
295	private static boolean isNotInRootZone(String domain) {
296	String tldurl = "http://www.iana.org/domains/root/db/" + domain + ".html";
297	BufferedReader in = null;
298	try {
299	File rootCheck = new File(System.getProperty("java.io.tmpdir"), "tld_" + domain + ".html");
300	download(rootCheck, tldurl, 0L);
301	in = new BufferedReader(new InputStreamReader(new FileInputStream(rootCheck), StandardCharsets.UTF_8));
302	String inputLine;
303	while ((inputLine = in.readLine()) != null) {
304	if (inputLine.contains("This domain is not present in the root zone at this time.")) {
305	return true;
306	}
307	}
308	in.close();
309	} catch (IOException e) {
310	e.printStackTrace();
311	} finally {
312	closeQuietly(in);
313	}
314	return false;
315	}
316
317	private static void closeQuietly(Closeable in) {
318	if (in != null) {
319	try {
320	in.close();
321	} catch (IOException e) {
322	e.printStackTrace();
323	}
324	}
325	}
326
327	// isInIanaList and isSorted are split into two methods.
328	// If/when access to the arrays is possible without reflection, the intermediate
329	// methods can be dropped
330	private static boolean isInIanaList(String arrayName, Set<String> ianaTlds) throws Exception {
331	Field f = DomainValidator.class.getDeclaredField(arrayName);
332	final boolean isPrivate = Modifier.isPrivate(f.getModifiers());
333	if (isPrivate) {
334	f.setAccessible(true);
335	}
336	String[] array = (String[]) f.get(null);
337	try {
338	return isInIanaList(arrayName, array, ianaTlds);
339	} finally {
340	if (isPrivate) {
341	f.setAccessible(false);
342	}
343	}
344	}
345
346	private static boolean isInIanaList(String name, String[] array, Set<String> ianaTlds) {
347	boolean ok = true;
348	for (int i = 0; i < array.length; i++) {
349	if (!ianaTlds.contains(array[i])) {
350	System.out.println(name + " contains unexpected value: " + array[i]);
351	ok = false;
352	}
353	}
354	return ok;
355	}
356
357	private static boolean isSortedLowerCase(String arrayName) throws Exception {
358	Field f = DomainValidator.class.getDeclaredField(arrayName);
359	final boolean isPrivate = Modifier.isPrivate(f.getModifiers());
360	if (isPrivate) {
361	f.setAccessible(true);
362	}
363	String[] array = (String[]) f.get(null);
364	try {
365	return isSortedLowerCase(arrayName, array);
366	} finally {
367	if (isPrivate) {
368	f.setAccessible(false);
369	}
370	}
371	}
372
373	private static boolean isLowerCase(String string) {
374	return string.equals(string.toLowerCase(Locale.ENGLISH));
375	}
376
377	// Check if an array is strictly sorted - and lowerCase
378	private static boolean isSortedLowerCase(String name, String[] array) {
379	boolean sorted = true;
380	boolean strictlySorted = true;
381	final int length = array.length;
382	boolean lowerCase = isLowerCase(array[length-1]); // Check the last entry
383	for (int i = 0; i < length-1; i++) { // compare all but last entry with next
384	final String entry = array[i];
385	final String nextEntry = array[i+1];
386	final int cmp = entry.compareTo(nextEntry);
387	if (cmp > 0) { // out of order
388	System.out.println("Out of order entry: " + entry + " < " + nextEntry + " in " + name);
389	sorted = false;
390	} else if (cmp == 0) {
391	strictlySorted = false;
392	System.out.println("Duplicated entry: " + entry + " in " + name);
393	}
394	if (!isLowerCase(entry)) {
395	System.out.println("Non lowerCase entry: " + entry + " in " + name);
396	lowerCase = false;
397	}
398	}
399	return sorted && strictlySorted && lowerCase;
400	}
401	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: