source: josm/trunk/src/org/openstreetmap/josm/data/cache/JCSCachedTileLoaderJob.java@ 13230

Last change on this file since 13230 was 13230, checked in by Don-vip, 6 years ago

fix #15690 - parse error message sent by Tomcat in case of WMS tile loading error

  • Property svn:eol-style set to native
File size: 21.9 KB
Line 
1// License: GPL. For details, see LICENSE file.
2package org.openstreetmap.josm.data.cache;
3
4import java.io.FileNotFoundException;
5import java.io.IOException;
6import java.net.HttpURLConnection;
7import java.net.URL;
8import java.security.SecureRandom;
9import java.util.HashSet;
10import java.util.List;
11import java.util.Map;
12import java.util.Set;
13import java.util.concurrent.ConcurrentHashMap;
14import java.util.concurrent.ConcurrentMap;
15import java.util.concurrent.LinkedBlockingDeque;
16import java.util.concurrent.ThreadPoolExecutor;
17import java.util.concurrent.TimeUnit;
18import java.util.regex.Matcher;
19import java.util.regex.Pattern;
20
21import org.apache.commons.jcs.access.behavior.ICacheAccess;
22import org.apache.commons.jcs.engine.behavior.ICacheElement;
23import org.openstreetmap.josm.data.cache.ICachedLoaderListener.LoadResult;
24import org.openstreetmap.josm.data.preferences.IntegerProperty;
25import org.openstreetmap.josm.tools.CheckParameterUtil;
26import org.openstreetmap.josm.tools.HttpClient;
27import org.openstreetmap.josm.tools.Logging;
28import org.openstreetmap.josm.tools.Utils;
29
30/**
31 * Generic loader for HTTP based tiles. Uses custom attribute, to check, if entry has expired
32 * according to HTTP headers sent with tile. If so, it tries to verify using Etags
33 * or If-Modified-Since / Last-Modified.
34 *
35 * If the tile is not valid, it will try to download it from remote service and put it
36 * to cache. If remote server will fail it will try to use stale entry.
37 *
38 * This class will keep only one Job running for specified tile. All others will just finish, but
39 * listeners will be gathered and notified, once download job will be finished
40 *
41 * @author Wiktor Niesiobędzki
42 * @param <K> cache entry key type
43 * @param <V> cache value type
44 * @since 8168
45 */
46public abstract class JCSCachedTileLoaderJob<K, V extends CacheEntry> implements ICachedLoaderJob<K> {
47 protected static final long DEFAULT_EXPIRE_TIME = TimeUnit.DAYS.toMillis(7);
48 // Limit for the max-age value send by the server.
49 protected static final long EXPIRE_TIME_SERVER_LIMIT = TimeUnit.DAYS.toMillis(28);
50 // Absolute expire time limit. Cached tiles that are older will not be used,
51 // even if the refresh from the server fails.
52 protected static final long ABSOLUTE_EXPIRE_TIME_LIMIT = TimeUnit.DAYS.toMillis(365);
53
54 // Pattern to detect Tomcat error message. Be careful with change of format:
55 // CHECKSTYLE.OFF: LineLength
56 // https://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/catalina/valves/ErrorReportValve.java?r1=1740707&r2=1779641&pathrev=1779641&diff_format=h
57 // CHECKSTYLE.ON: LineLength
58 protected static final Pattern TOMCAT_ERR_MESSAGE = Pattern.compile(
59 ".*<p><b>[^<]+</b>[^<]+</p><p><b>[^<]+</b> (?:<u>)?([^<]*)(?:</u>)?</p><p><b>[^<]+</b> (?:<u>)?[^<]*(?:</u>)?</p>.*",
60 Pattern.CASE_INSENSITIVE);
61
62 /**
63 * maximum download threads that will be started
64 */
65 public static final IntegerProperty THREAD_LIMIT = new IntegerProperty("cache.jcs.max_threads", 10);
66
67 /*
68 * ThreadPoolExecutor starts new threads, until THREAD_LIMIT is reached. Then it puts tasks into LinkedBlockingDeque.
69 *
70 * The queue works FIFO, so one needs to take care about ordering of the entries submitted
71 *
72 * There is no point in canceling tasks, that are already taken by worker threads (if we made so much effort, we can at least cache
73 * the response, so later it could be used). We could actually cancel what is in LIFOQueue, but this is a tradeoff between simplicity
74 * and performance (we do want to have something to offer to worker threads before tasks will be resubmitted by class consumer)
75 */
76
77 private static final ThreadPoolExecutor DEFAULT_DOWNLOAD_JOB_DISPATCHER = new ThreadPoolExecutor(
78 1, // we have a small queue, so threads will be quickly started (threads are started only, when queue is full)
79 THREAD_LIMIT.get(), // do not this number of threads
80 30, // keepalive for thread
81 TimeUnit.SECONDS,
82 // make queue of LIFO type - so recently requested tiles will be loaded first (assuming that these are which user is waiting to see)
83 new LinkedBlockingDeque<Runnable>(),
84 Utils.newThreadFactory("JCS-downloader-%d", Thread.NORM_PRIORITY)
85 );
86
87 private static final ConcurrentMap<String, Set<ICachedLoaderListener>> inProgress = new ConcurrentHashMap<>();
88 private static final ConcurrentMap<String, Boolean> useHead = new ConcurrentHashMap<>();
89
90 protected final long now; // when the job started
91
92 private final ICacheAccess<K, V> cache;
93 private ICacheElement<K, V> cacheElement;
94 protected V cacheData;
95 protected CacheEntryAttributes attributes;
96
97 // HTTP connection parameters
98 private final int connectTimeout;
99 private final int readTimeout;
100 private final Map<String, String> headers;
101 private final ThreadPoolExecutor downloadJobExecutor;
102 private Runnable finishTask;
103 private boolean force;
104
105 /**
106 * @param cache cache instance that we will work on
107 * @param headers HTTP headers to be sent together with request
108 * @param readTimeout when connecting to remote resource
109 * @param connectTimeout when connecting to remote resource
110 * @param downloadJobExecutor that will be executing the jobs
111 */
112 public JCSCachedTileLoaderJob(ICacheAccess<K, V> cache,
113 int connectTimeout, int readTimeout,
114 Map<String, String> headers,
115 ThreadPoolExecutor downloadJobExecutor) {
116 CheckParameterUtil.ensureParameterNotNull(cache, "cache");
117 this.cache = cache;
118 this.now = System.currentTimeMillis();
119 this.connectTimeout = connectTimeout;
120 this.readTimeout = readTimeout;
121 this.headers = headers;
122 this.downloadJobExecutor = downloadJobExecutor;
123 }
124
125 /**
126 * @param cache cache instance that we will work on
127 * @param headers HTTP headers to be sent together with request
128 * @param readTimeout when connecting to remote resource
129 * @param connectTimeout when connecting to remote resource
130 */
131 public JCSCachedTileLoaderJob(ICacheAccess<K, V> cache,
132 int connectTimeout, int readTimeout,
133 Map<String, String> headers) {
134 this(cache, connectTimeout, readTimeout,
135 headers, DEFAULT_DOWNLOAD_JOB_DISPATCHER);
136 }
137
138 private void ensureCacheElement() {
139 if (cacheElement == null && getCacheKey() != null) {
140 cacheElement = cache.getCacheElement(getCacheKey());
141 if (cacheElement != null) {
142 attributes = (CacheEntryAttributes) cacheElement.getElementAttributes();
143 cacheData = cacheElement.getVal();
144 }
145 }
146 }
147
148 @Override
149 public V get() {
150 ensureCacheElement();
151 return cacheData;
152 }
153
154 @Override
155 public void submit(ICachedLoaderListener listener, boolean force) throws IOException {
156 this.force = force;
157 boolean first = false;
158 URL url = getUrl();
159 String deduplicationKey = null;
160 if (url != null) {
161 // url might be null, for example when Bing Attribution is not loaded yet
162 deduplicationKey = url.toString();
163 }
164 if (deduplicationKey == null) {
165 Logging.warn("No url returned for: {0}, skipping", getCacheKey());
166 throw new IllegalArgumentException("No url returned");
167 }
168 synchronized (inProgress) {
169 Set<ICachedLoaderListener> newListeners = inProgress.get(deduplicationKey);
170 if (newListeners == null) {
171 newListeners = new HashSet<>();
172 inProgress.put(deduplicationKey, newListeners);
173 first = true;
174 }
175 newListeners.add(listener);
176 }
177
178 if (first || force) {
179 // submit all jobs to separate thread, so calling thread is not blocked with IO when loading from disk
180 Logging.debug("JCS - Submitting job for execution for url: {0}", getUrlNoException());
181 downloadJobExecutor.execute(this);
182 }
183 }
184
185 /**
186 * This method is run when job has finished
187 */
188 protected void executionFinished() {
189 if (finishTask != null) {
190 finishTask.run();
191 }
192 }
193
194 /**
195 *
196 * @return checks if object from cache has sufficient data to be returned
197 */
198 protected boolean isObjectLoadable() {
199 if (cacheData == null) {
200 return false;
201 }
202 return cacheData.getContent().length > 0;
203 }
204
205 /**
206 * Simple implementation. All errors should be cached as empty. Though some JDK (JDK8 on Windows for example)
207 * doesn't return 4xx error codes, instead they do throw an FileNotFoundException or IOException
208 *
209 * @return true if we should put empty object into cache, regardless of what remote resource has returned
210 */
211 protected boolean cacheAsEmpty() {
212 return attributes.getResponseCode() < 500;
213 }
214
215 /**
216 * @return key under which discovered server settings will be kept
217 */
218 protected String getServerKey() {
219 try {
220 return getUrl().getHost();
221 } catch (IOException e) {
222 Logging.trace(e);
223 return null;
224 }
225 }
226
227 @Override
228 public void run() {
229 final Thread currentThread = Thread.currentThread();
230 final String oldName = currentThread.getName();
231 currentThread.setName("JCS Downloading: " + getUrlNoException());
232 Logging.debug("JCS - starting fetch of url: {0} ", getUrlNoException());
233 ensureCacheElement();
234 try {
235 // try to fetch from cache
236 if (!force && cacheElement != null && isCacheElementValid() && isObjectLoadable()) {
237 // we got something in cache, and it's valid, so lets return it
238 Logging.debug("JCS - Returning object from cache: {0}", getCacheKey());
239 finishLoading(LoadResult.SUCCESS);
240 return;
241 }
242
243 // try to load object from remote resource
244 if (loadObject()) {
245 finishLoading(LoadResult.SUCCESS);
246 } else {
247 // if loading failed - check if we can return stale entry
248 if (isObjectLoadable()) {
249 // try to get stale entry in cache
250 finishLoading(LoadResult.SUCCESS);
251 Logging.debug("JCS - found stale object in cache: {0}", getUrlNoException());
252 } else {
253 // failed completely
254 finishLoading(LoadResult.FAILURE);
255 }
256 }
257 } finally {
258 executionFinished();
259 currentThread.setName(oldName);
260 }
261 }
262
263 private void finishLoading(LoadResult result) {
264 Set<ICachedLoaderListener> listeners;
265 synchronized (inProgress) {
266 try {
267 listeners = inProgress.remove(getUrl().toString());
268 } catch (IOException e) {
269 listeners = null;
270 Logging.trace(e);
271 }
272 }
273 if (listeners == null) {
274 Logging.warn("Listener not found for URL: {0}. Listener not notified!", getUrlNoException());
275 return;
276 }
277 for (ICachedLoaderListener l: listeners) {
278 l.loadingFinished(cacheData, attributes, result);
279 }
280 }
281
282 protected boolean isCacheElementValid() {
283 long expires = attributes.getExpirationTime();
284
285 // check by expire date set by server
286 if (expires != 0L) {
287 // put a limit to the expire time (some servers send a value
288 // that is too large)
289 expires = Math.min(expires, attributes.getCreateTime() + EXPIRE_TIME_SERVER_LIMIT);
290 if (now > expires) {
291 Logging.debug("JCS - Object {0} has expired -> valid to {1}, now is: {2}",
292 new Object[]{getUrlNoException(), Long.toString(expires), Long.toString(now)});
293 return false;
294 }
295 } else if (attributes.getLastModification() > 0 &&
296 now - attributes.getLastModification() > DEFAULT_EXPIRE_TIME) {
297 // check by file modification date
298 Logging.debug("JCS - Object has expired, maximum file age reached {0}", getUrlNoException());
299 return false;
300 } else if (now - attributes.getCreateTime() > DEFAULT_EXPIRE_TIME) {
301 Logging.debug("JCS - Object has expired, maximum time since object creation reached {0}", getUrlNoException());
302 return false;
303 }
304 return true;
305 }
306
307 /**
308 * @return true if object was successfully downloaded, false, if there was a loading failure
309 */
310 private boolean loadObject() {
311 if (attributes == null) {
312 attributes = new CacheEntryAttributes();
313 }
314 try {
315 // if we have object in cache, and host doesn't support If-Modified-Since nor If-None-Match
316 // then just use HEAD request and check returned values
317 if (isObjectLoadable() &&
318 Boolean.TRUE.equals(useHead.get(getServerKey())) &&
319 isCacheValidUsingHead()) {
320 Logging.debug("JCS - cache entry verified using HEAD request: {0}", getUrl());
321 return true;
322 }
323
324 Logging.debug("JCS - starting HttpClient GET request for URL: {0}", getUrl());
325 final HttpClient request = getRequest("GET", true);
326
327 if (isObjectLoadable() &&
328 (now - attributes.getLastModification()) <= ABSOLUTE_EXPIRE_TIME_LIMIT) {
329 request.setIfModifiedSince(attributes.getLastModification());
330 }
331 if (isObjectLoadable() && attributes.getEtag() != null) {
332 request.setHeader("If-None-Match", attributes.getEtag());
333 }
334
335 final HttpClient.Response urlConn = request.connect();
336
337 if (urlConn.getResponseCode() == 304) {
338 // If isModifiedSince or If-None-Match has been set
339 // and the server answers with a HTTP 304 = "Not Modified"
340 Logging.debug("JCS - If-Modified-Since/ETag test: local version is up to date: {0}", getUrl());
341 return true;
342 } else if (isObjectLoadable() // we have an object in cache, but we haven't received 304 response code
343 && (
344 (attributes.getEtag() != null && attributes.getEtag().equals(urlConn.getHeaderField("ETag"))) ||
345 attributes.getLastModification() == urlConn.getLastModified())
346 ) {
347 // we sent ETag or If-Modified-Since, but didn't get 304 response code
348 // for further requests - use HEAD
349 String serverKey = getServerKey();
350 Logging.info("JCS - Host: {0} found not to return 304 codes for If-Modified-Since or If-None-Match headers",
351 serverKey);
352 useHead.put(serverKey, Boolean.TRUE);
353 }
354
355 attributes = parseHeaders(urlConn);
356
357 for (int i = 0; i < 5; ++i) {
358 if (urlConn.getResponseCode() == HttpURLConnection.HTTP_UNAVAILABLE) {
359 Thread.sleep(5000L+new SecureRandom().nextInt(5000));
360 continue;
361 }
362
363 attributes.setResponseCode(urlConn.getResponseCode());
364 byte[] raw;
365 if (urlConn.getResponseCode() == HttpURLConnection.HTTP_OK) {
366 raw = Utils.readBytesFromStream(urlConn.getContent());
367 } else {
368 raw = new byte[]{};
369 try {
370 String data = urlConn.fetchContent();
371 if (!data.isEmpty()) {
372 Matcher m = TOMCAT_ERR_MESSAGE.matcher(data);
373 if (m.matches()) {
374 attributes.setErrorMessage(m.group(1).replace("'", "''"));
375 }
376 }
377 } catch (IOException e) {
378 Logging.warn(e);
379 }
380 }
381
382 if (isResponseLoadable(urlConn.getHeaderFields(), urlConn.getResponseCode(), raw)) {
383 // we need to check cacheEmpty, so for cases, when data is returned, but we want to store
384 // as empty (eg. empty tile images) to save some space
385 cacheData = createCacheEntry(raw);
386 cache.put(getCacheKey(), cacheData, attributes);
387 Logging.debug("JCS - downloaded key: {0}, length: {1}, url: {2}",
388 new Object[] {getCacheKey(), raw.length, getUrl()});
389 return true;
390 } else if (cacheAsEmpty()) {
391 cacheData = createCacheEntry(new byte[]{});
392 cache.put(getCacheKey(), cacheData, attributes);
393 Logging.debug("JCS - Caching empty object {0}", getUrl());
394 return true;
395 } else {
396 Logging.debug("JCS - failure during load - reponse is not loadable nor cached as empty");
397 return false;
398 }
399 }
400 } catch (FileNotFoundException e) {
401 Logging.debug("JCS - Caching empty object as server returned 404 for: {0}", getUrlNoException());
402 attributes.setResponseCode(404);
403 attributes.setError(e);
404 boolean doCache = isResponseLoadable(null, 404, null) || cacheAsEmpty();
405 if (doCache) {
406 cacheData = createCacheEntry(new byte[]{});
407 cache.put(getCacheKey(), cacheData, attributes);
408 }
409 return doCache;
410 } catch (IOException e) {
411 Logging.debug("JCS - IOException during communication with server for: {0}", getUrlNoException());
412 if (isObjectLoadable()) {
413 return true;
414 } else {
415 attributes.setError(e);
416 attributes.setResponseCode(599); // set dummy error code, greater than 500 so it will be not cached
417 return false;
418 }
419
420 } catch (InterruptedException e) {
421 attributes.setError(e);
422 Logging.logWithStackTrace(Logging.LEVEL_WARN, e, "JCS - Exception during download {0}", getUrlNoException());
423 Thread.currentThread().interrupt();
424 }
425 Logging.warn("JCS - Silent failure during download: {0}", getUrlNoException());
426 return false;
427 }
428
429 /**
430 * Check if the object is loadable. This means, if the data will be parsed, and if this response
431 * will finish as successful retrieve.
432 *
433 * This simple implementation doesn't load empty response, nor client (4xx) and server (5xx) errors
434 *
435 * @param headerFields headers sent by server
436 * @param responseCode http status code
437 * @param raw data read from server
438 * @return true if object should be cached and returned to listener
439 */
440 protected boolean isResponseLoadable(Map<String, List<String>> headerFields, int responseCode, byte[] raw) {
441 return raw != null && raw.length != 0 && responseCode < 400;
442 }
443
444 protected abstract V createCacheEntry(byte[] content);
445
446 protected CacheEntryAttributes parseHeaders(HttpClient.Response urlConn) {
447 CacheEntryAttributes ret = new CacheEntryAttributes();
448
449 Long lng = urlConn.getExpiration();
450 if (lng.equals(0L)) {
451 try {
452 String str = urlConn.getHeaderField("Cache-Control");
453 if (str != null) {
454 for (String token: str.split(",")) {
455 if (token.startsWith("max-age=")) {
456 lng = TimeUnit.SECONDS.toMillis(Long.parseLong(token.substring(8))) + System.currentTimeMillis();
457 }
458 }
459 }
460 } catch (NumberFormatException e) {
461 // ignore malformed Cache-Control headers
462 Logging.trace(e);
463 }
464 }
465
466 ret.setExpirationTime(lng);
467 ret.setLastModification(now);
468 ret.setEtag(urlConn.getHeaderField("ETag"));
469
470 return ret;
471 }
472
473 private HttpClient getRequest(String requestMethod, boolean noCache) throws IOException {
474 final HttpClient urlConn = HttpClient.create(getUrl(), requestMethod);
475 urlConn.setAccept("text/html, image/png, image/jpeg, image/gif, */*");
476 urlConn.setReadTimeout(readTimeout); // 30 seconds read timeout
477 urlConn.setConnectTimeout(connectTimeout);
478 if (headers != null) {
479 urlConn.setHeaders(headers);
480 }
481
482 if (force || noCache) {
483 urlConn.useCache(false);
484 }
485 return urlConn;
486 }
487
488 private boolean isCacheValidUsingHead() throws IOException {
489 final HttpClient.Response urlConn = getRequest("HEAD", false).connect();
490 long lastModified = urlConn.getLastModified();
491 return (attributes.getEtag() != null && attributes.getEtag().equals(urlConn.getHeaderField("ETag"))) ||
492 (lastModified != 0 && lastModified <= attributes.getLastModification());
493 }
494
495 /**
496 * TODO: move to JobFactory
497 * cancels all outstanding tasks in the queue.
498 */
499 public void cancelOutstandingTasks() {
500 for (Runnable r: downloadJobExecutor.getQueue()) {
501 if (downloadJobExecutor.remove(r) && r instanceof JCSCachedTileLoaderJob) {
502 ((JCSCachedTileLoaderJob<?, ?>) r).handleJobCancellation();
503 }
504 }
505 }
506
507 /**
508 * Sets a job, that will be run, when job will finish execution
509 * @param runnable that will be executed
510 */
511 public void setFinishedTask(Runnable runnable) {
512 this.finishTask = runnable;
513
514 }
515
516 /**
517 * Marks this job as canceled
518 */
519 public void handleJobCancellation() {
520 finishLoading(LoadResult.CANCELED);
521 }
522
523 private URL getUrlNoException() {
524 try {
525 return getUrl();
526 } catch (IOException e) {
527 return null;
528 }
529 }
530}
Note: See TracBrowser for help on using the repository browser.