1 | /** |
---|
2 | * This software is copyright (c) 2014 by |
---|
3 | * - Institut fuer Deutsche Sprache (http://www.ids-mannheim.de) |
---|
4 | * This is free software. You can redistribute it |
---|
5 | * and/or modify it under the terms described in |
---|
6 | * the GNU General Public License v3 of which you |
---|
7 | * should have received a copy. Otherwise you can download |
---|
8 | * it from |
---|
9 | * |
---|
10 | * http://www.gnu.org/licenses/gpl-3.0.txt |
---|
11 | * |
---|
12 | * @copyright Institut fuer Deutsche Sprache (http://www.ids-mannheim.de) |
---|
13 | * |
---|
14 | * @license http://www.gnu.org/licenses/gpl-3.0.txt |
---|
15 | * GNU General Public License v3 |
---|
16 | */ |
---|
17 | package eu.clarin.cmdi.validator; |
---|
18 | |
---|
19 | import java.io.File; |
---|
20 | import java.io.FileInputStream; |
---|
21 | import java.io.FileOutputStream; |
---|
22 | import java.io.IOException; |
---|
23 | import java.io.InputStream; |
---|
24 | import java.io.InterruptedIOException; |
---|
25 | import java.net.URI; |
---|
26 | import java.net.URISyntaxException; |
---|
27 | import java.nio.channels.FileLock; |
---|
28 | import java.util.HashSet; |
---|
29 | import java.util.Set; |
---|
30 | import java.util.concurrent.TimeUnit; |
---|
31 | |
---|
32 | import javax.xml.XMLConstants; |
---|
33 | |
---|
34 | import org.apache.http.HttpEntity; |
---|
35 | import org.apache.http.HttpResponse; |
---|
36 | import org.apache.http.HttpStatus; |
---|
37 | import org.apache.http.StatusLine; |
---|
38 | import org.apache.http.client.config.CookieSpecs; |
---|
39 | import org.apache.http.client.config.RequestConfig; |
---|
40 | import org.apache.http.client.methods.CloseableHttpResponse; |
---|
41 | import org.apache.http.client.methods.HttpGet; |
---|
42 | import org.apache.http.config.SocketConfig; |
---|
43 | import org.apache.http.conn.ConnectionKeepAliveStrategy; |
---|
44 | import org.apache.http.impl.client.CloseableHttpClient; |
---|
45 | import org.apache.http.impl.client.HttpClients; |
---|
46 | import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; |
---|
47 | import org.apache.http.protocol.BasicHttpContext; |
---|
48 | import org.apache.http.protocol.HttpContext; |
---|
49 | import org.slf4j.Logger; |
---|
50 | import org.slf4j.LoggerFactory; |
---|
51 | |
---|
52 | public final class CMDISchemaLoader { |
---|
53 | public static final long DISABLE_CACHE_AGING = -1; |
---|
54 | private static final Logger logger = |
---|
55 | LoggerFactory.getLogger(CMDISchemaLoader.class); |
---|
56 | private static final String USER_AGENT = |
---|
57 | "CMDI-Validator-SchemaLoader/" + Version.getVersion(); |
---|
58 | private static final String XML_XSD_RESSOURCE = "/xml.xsd"; |
---|
59 | private static final String EXTENSION_XSD = "xsd"; |
---|
60 | private static final String EXTENSION_ERROR = "error"; |
---|
61 | private final File cacheDirectory; |
---|
62 | private final long maxCacheAge; |
---|
63 | private final long maxNegativeCacheAge; |
---|
64 | private final CloseableHttpClient httpClient; |
---|
65 | private final Set<String> pending = new HashSet<String>(128); |
---|
66 | private final Object guard = new Object(); |
---|
67 | private final Object waiter = new Object(); |
---|
68 | |
---|
69 | |
---|
70 | public CMDISchemaLoader(File cacheDirectory, long maxCacheAge, |
---|
71 | long maxNegativeCacheAge, int connectTimeout, |
---|
72 | int socketTimeout) { |
---|
73 | if (cacheDirectory == null) { |
---|
74 | throw new NullPointerException("cacheDirectory == null"); |
---|
75 | } |
---|
76 | if (maxCacheAge < -1) { |
---|
77 | throw new IllegalArgumentException("maxCacheAge < -1"); |
---|
78 | } |
---|
79 | if (maxNegativeCacheAge < -1) { |
---|
80 | throw new IllegalArgumentException("maxNegativeCacheAge < -1"); |
---|
81 | } |
---|
82 | this.cacheDirectory = cacheDirectory; |
---|
83 | this.maxCacheAge = maxCacheAge; |
---|
84 | this.maxNegativeCacheAge = maxNegativeCacheAge; |
---|
85 | this.httpClient = createHttpClient(connectTimeout, socketTimeout); |
---|
86 | } |
---|
87 | |
---|
88 | |
---|
89 | public CMDISchemaLoader(File cacheDirectory, long maxCacheAge, int connectTimeout, |
---|
90 | int socketTimeout) { |
---|
91 | this(cacheDirectory, maxCacheAge, TimeUnit.HOURS.toMillis(1), |
---|
92 | connectTimeout, socketTimeout); |
---|
93 | } |
---|
94 | |
---|
95 | public CMDISchemaLoader(File cacheDirectory, long maxCacheAge) { |
---|
96 | this(cacheDirectory, maxCacheAge, TimeUnit.HOURS.toMillis(1), 60000, 60000); |
---|
97 | } |
---|
98 | |
---|
99 | |
---|
100 | public CMDISchemaLoader(File cacheDirectory) { |
---|
101 | this(cacheDirectory, DISABLE_CACHE_AGING); |
---|
102 | } |
---|
103 | |
---|
104 | |
---|
105 | public InputStream loadSchemaFile(String targetNamespace, |
---|
106 | String schemaLocation) throws IOException { |
---|
107 | if (targetNamespace == null) { |
---|
108 | throw new NullPointerException("targetNamespace == null"); |
---|
109 | } |
---|
110 | if (schemaLocation == null) { |
---|
111 | throw new NullPointerException("schemaLocation == null"); |
---|
112 | } |
---|
113 | |
---|
114 | logger.trace("loading schema: targetNamespace={}, location={}", |
---|
115 | targetNamespace, schemaLocation); |
---|
116 | InputStream stream = null; |
---|
117 | if (XMLConstants.XML_NS_URI.equalsIgnoreCase(targetNamespace)) { |
---|
118 | stream = this.getClass().getResourceAsStream(XML_XSD_RESSOURCE); |
---|
119 | if (stream != null) { |
---|
120 | logger.trace("using bundled schema for '{}'", schemaLocation); |
---|
121 | return stream; |
---|
122 | } |
---|
123 | logger.warn("unable to load bundled schema for '{}', " + |
---|
124 | "falling back to download.", schemaLocation); |
---|
125 | } |
---|
126 | |
---|
127 | // fall back to file cache ... |
---|
128 | final File cacheDataFile = |
---|
129 | makeFile(schemaLocation, EXTENSION_XSD); |
---|
130 | final File cacheErrorFile = |
---|
131 | makeFile(schemaLocation, EXTENSION_ERROR); |
---|
132 | |
---|
133 | for (;;) { |
---|
134 | boolean doDownload = false; |
---|
135 | |
---|
136 | synchronized (guard) { |
---|
137 | /* |
---|
138 | * check, if an earlier attempt to download the schema failed. |
---|
139 | */ |
---|
140 | if (cacheErrorFile.exists()) { |
---|
141 | if (isExpired(cacheErrorFile, maxNegativeCacheAge)) { |
---|
142 | logger.trace("-> error file '{}' expired", |
---|
143 | cacheErrorFile); |
---|
144 | cacheErrorFile.delete(); |
---|
145 | } else { |
---|
146 | throw new IOException("cached error condition detected"); |
---|
147 | } |
---|
148 | } |
---|
149 | |
---|
150 | if (cacheDataFile.exists()) { |
---|
151 | if (isExpired(cacheDataFile, maxCacheAge)) { |
---|
152 | logger.debug("cached entry for '{}' has expired", |
---|
153 | schemaLocation); |
---|
154 | cacheDataFile.delete(); |
---|
155 | } else { |
---|
156 | synchronized (pending) { |
---|
157 | if (!pending.contains(schemaLocation)) { |
---|
158 | logger.trace("-> '{}' from file cache", schemaLocation); |
---|
159 | return new FileInputStream(cacheDataFile); |
---|
160 | } |
---|
161 | } |
---|
162 | } |
---|
163 | } |
---|
164 | |
---|
165 | synchronized (pending) { |
---|
166 | if (!pending.contains(schemaLocation)) { |
---|
167 | doDownload = true; |
---|
168 | pending.add(schemaLocation); |
---|
169 | logger.trace("pending + '{}'", schemaLocation); |
---|
170 | } |
---|
171 | } // synchronized (pending) |
---|
172 | } // synchronized (guard) |
---|
173 | |
---|
174 | // either download in this thread of wait for pending download |
---|
175 | if (doDownload) { |
---|
176 | boolean failed = false; |
---|
177 | try { |
---|
178 | download(cacheDataFile, schemaLocation); |
---|
179 | logger.trace("downloaded schema from '{}' succesfully", schemaLocation); |
---|
180 | return new FileInputStream(cacheDataFile); |
---|
181 | } catch (IOException e) { |
---|
182 | logger.error("downloading schema from '{}' failed", schemaLocation); |
---|
183 | logger.error("cause:", e); |
---|
184 | failed = true; |
---|
185 | throw e; |
---|
186 | } finally { |
---|
187 | synchronized (guard) { |
---|
188 | if (failed) { |
---|
189 | if (cacheErrorFile.exists()) { |
---|
190 | cacheErrorFile.setLastModified( |
---|
191 | System.currentTimeMillis()); |
---|
192 | } else { |
---|
193 | cacheErrorFile.createNewFile(); |
---|
194 | } |
---|
195 | } |
---|
196 | synchronized (pending) { |
---|
197 | logger.trace("pending - '{}'", schemaLocation); |
---|
198 | pending.remove(schemaLocation); |
---|
199 | synchronized (waiter) { |
---|
200 | logger.trace("notify all waiters for downloading schema from '{}'", schemaLocation); |
---|
201 | waiter.notifyAll(); |
---|
202 | } // synchronized (waiter) |
---|
203 | }// synchronized (pending) |
---|
204 | } // synchronized (guard) |
---|
205 | } |
---|
206 | } else { |
---|
207 | try { |
---|
208 | synchronized (waiter) { |
---|
209 | logger.trace("waiting for download schema from '{}'", schemaLocation); |
---|
210 | waiter.wait(); |
---|
211 | } // synchronized (waiter) |
---|
212 | } catch (InterruptedException e) { |
---|
213 | throw new InterruptedIOException( |
---|
214 | "interrupted while waiting for download"); |
---|
215 | } |
---|
216 | } |
---|
217 | } // for |
---|
218 | } |
---|
219 | |
---|
220 | |
---|
221 | private void download(File cacheFile, String schemaLocation) |
---|
222 | throws IOException { |
---|
223 | try { |
---|
224 | logger.debug("downloading schema from '{}'", schemaLocation); |
---|
225 | final URI uri = new URI(schemaLocation); |
---|
226 | final HttpGet request = new HttpGet(uri); |
---|
227 | try { |
---|
228 | logger.trace("submitting HTTP request: {}", uri.toString()); |
---|
229 | final CloseableHttpResponse response = |
---|
230 | httpClient.execute(request, new BasicHttpContext()); |
---|
231 | try { |
---|
232 | final StatusLine status = response.getStatusLine(); |
---|
233 | if (status.getStatusCode() == HttpStatus.SC_OK) { |
---|
234 | final HttpEntity entity = response.getEntity(); |
---|
235 | if (entity == null) { |
---|
236 | throw new IOException( |
---|
237 | "request returned no message body"); |
---|
238 | } |
---|
239 | |
---|
240 | FileOutputStream out = null; |
---|
241 | try { |
---|
242 | out = new FileOutputStream(cacheFile); |
---|
243 | // use exclusive lock |
---|
244 | final FileLock lock = out.getChannel().lock(); |
---|
245 | try { |
---|
246 | entity.writeTo(out); |
---|
247 | out.flush(); |
---|
248 | out.getFD().sync(); |
---|
249 | } finally { |
---|
250 | lock.release(); |
---|
251 | } |
---|
252 | } finally { |
---|
253 | if (out != null) { |
---|
254 | out.close(); |
---|
255 | } |
---|
256 | } |
---|
257 | } else { |
---|
258 | switch (status.getStatusCode()) { |
---|
259 | case HttpStatus.SC_NOT_FOUND: |
---|
260 | throw new IOException("not found: " + uri); |
---|
261 | default: |
---|
262 | throw new IOException("unexpected status: " + |
---|
263 | status.getStatusCode()); |
---|
264 | } // switch |
---|
265 | } |
---|
266 | } catch (IOException e) { |
---|
267 | /* delete broken cache file */ |
---|
268 | if (cacheFile != null) { |
---|
269 | cacheFile.delete(); |
---|
270 | } |
---|
271 | throw e; |
---|
272 | } finally { |
---|
273 | /* make sure to release allocated resources */ |
---|
274 | response.close(); |
---|
275 | } |
---|
276 | } finally { |
---|
277 | request.reset(); |
---|
278 | } |
---|
279 | } catch (URISyntaxException e) { |
---|
280 | throw new IOException("schemaLocation uri is invalid: " + |
---|
281 | schemaLocation, e); |
---|
282 | } |
---|
283 | } |
---|
284 | |
---|
285 | |
---|
286 | private File makeFile(String schemaLocation, String extension) { |
---|
287 | final StringBuilder sb = new StringBuilder(); |
---|
288 | for (int i = 0; i < schemaLocation.length(); i++) { |
---|
289 | final char c = schemaLocation.charAt(i); |
---|
290 | switch (c) { |
---|
291 | case '.': |
---|
292 | /* FALL-THROUGH */ |
---|
293 | case ':': |
---|
294 | /* FALL-THROUGH */ |
---|
295 | case ';': |
---|
296 | /* FALL-THROUGH */ |
---|
297 | case '?': |
---|
298 | /* FALL-THROUGH */ |
---|
299 | case '&': |
---|
300 | /* FALL-THROUGH */ |
---|
301 | case '=': |
---|
302 | /* FALL-THROUGH */ |
---|
303 | case '"': |
---|
304 | /* FALL-THROUGH */ |
---|
305 | case '\'': |
---|
306 | /* FALL-THROUGH */ |
---|
307 | case '/': |
---|
308 | /* FALL-THROUGH */ |
---|
309 | case '\\': |
---|
310 | sb.append('_'); |
---|
311 | break; |
---|
312 | default: |
---|
313 | sb.append(c); |
---|
314 | } |
---|
315 | } // for |
---|
316 | sb.append(".").append(extension); |
---|
317 | return new File(cacheDirectory, sb.toString()); |
---|
318 | } |
---|
319 | |
---|
320 | |
---|
321 | private CloseableHttpClient createHttpClient(int connectTimeout, |
---|
322 | int socketTimeout) { |
---|
323 | final PoolingHttpClientConnectionManager manager = |
---|
324 | new PoolingHttpClientConnectionManager(); |
---|
325 | manager.setDefaultMaxPerRoute(8); |
---|
326 | manager.setMaxTotal(128); |
---|
327 | |
---|
328 | final SocketConfig socketConfig = SocketConfig.custom() |
---|
329 | .setSoReuseAddress(true) |
---|
330 | .setSoLinger(0) |
---|
331 | .build(); |
---|
332 | |
---|
333 | final RequestConfig requestConfig = RequestConfig.custom() |
---|
334 | .setAuthenticationEnabled(false) |
---|
335 | .setRedirectsEnabled(true) |
---|
336 | .setMaxRedirects(4) |
---|
337 | .setCircularRedirectsAllowed(false) |
---|
338 | .setCookieSpec(CookieSpecs.IGNORE_COOKIES) |
---|
339 | .setConnectTimeout(connectTimeout) |
---|
340 | .setSocketTimeout(socketTimeout) |
---|
341 | .setConnectionRequestTimeout(0) /* infinite */ |
---|
342 | .setStaleConnectionCheckEnabled(true) |
---|
343 | .build(); |
---|
344 | |
---|
345 | final ConnectionKeepAliveStrategy keepAliveStrategy = |
---|
346 | new ConnectionKeepAliveStrategy() { |
---|
347 | @Override |
---|
348 | public long getKeepAliveDuration(final HttpResponse response, |
---|
349 | final HttpContext context) { |
---|
350 | return 60000; |
---|
351 | } |
---|
352 | }; |
---|
353 | |
---|
354 | return HttpClients.custom() |
---|
355 | .setUserAgent(USER_AGENT) |
---|
356 | .setConnectionManager(manager) |
---|
357 | .setDefaultSocketConfig(socketConfig) |
---|
358 | .setDefaultRequestConfig(requestConfig) |
---|
359 | .setKeepAliveStrategy(keepAliveStrategy) |
---|
360 | .build(); |
---|
361 | } |
---|
362 | |
---|
363 | |
---|
364 | private static boolean isExpired(File file, long maxAge) { |
---|
365 | if (maxAge != DISABLE_CACHE_AGING) { |
---|
366 | return (System.currentTimeMillis() - file.lastModified()) >= maxAge; |
---|
367 | } else { |
---|
368 | return false; |
---|
369 | } |
---|
370 | } |
---|
371 | |
---|
372 | |
---|
373 | @Override |
---|
374 | protected void finalize() throws Throwable { |
---|
375 | httpClient.close(); |
---|
376 | } |
---|
377 | |
---|
378 | } // class CMDISchemaLoader |
---|