Changeset 5387 for CMDIValidator
- Timestamp:
- 06/25/14 14:51:13 (10 years ago)
- Location:
- CMDIValidator/trunk
- Files:
-
- 7 edited
Legend:
- Unmodified
- Added
- Removed
-
CMDIValidator/trunk/cmdi-validator-core/pom.xml
r5384 r5387 40 40 <groupId>org.apache.httpcomponents</groupId> 41 41 <artifactId>httpclient</artifactId> 42 <version>4. 2.5</version>42 <version>4.3.4</version> 43 43 </dependency> 44 44 … … 111 111 </dependency> 112 112 113 <!-- 113 114 <dependency> 114 115 <groupId>org.apache.commons</groupId> … … 116 117 <version>3.2</version> 117 118 </dependency> 119 --> 118 120 </dependencies> 119 121 -
CMDIValidator/trunk/cmdi-validator-core/src/main/java/eu/clarin/cmdi/validator/CMDISchemaLoader.java
r5384 r5387 22 22 import java.io.IOException; 23 23 import java.io.InputStream; 24 import java.io.InterruptedIOException; 24 25 import java.net.URI; 25 26 import java.net.URISyntaxException; 27 import java.nio.channels.FileLock; 28 import java.util.HashSet; 29 import java.util.Set; 30 import java.util.concurrent.TimeUnit; 31 26 32 import javax.xml.XMLConstants; 27 33 … … 30 36 import org.apache.http.HttpStatus; 31 37 import org.apache.http.StatusLine; 32 import org.apache.http.client.HttpClient; 38 import org.apache.http.client.config.CookieSpecs; 39 import org.apache.http.client.config.RequestConfig; 40 import org.apache.http.client.methods.CloseableHttpResponse; 33 41 import org.apache.http.client.methods.HttpGet; 34 import org.apache.http.client.params.ClientPNames; 35 import org.apache.http.client.utils.HttpClientUtils; 36 import org.apache.http.impl.client.DefaultHttpClient; 37 import org.apache.http.params.CoreProtocolPNames; 38 import org.apache.http.params.HttpParams; 39 import org.apache.http.util.EntityUtils; 42 import org.apache.http.config.SocketConfig; 43 import org.apache.http.conn.ConnectionKeepAliveStrategy; 44 import org.apache.http.impl.client.CloseableHttpClient; 45 import org.apache.http.impl.client.HttpClients; 46 import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; 47 import org.apache.http.protocol.BasicHttpContext; 48 import org.apache.http.protocol.HttpContext; 40 49 import org.slf4j.Logger; 41 50 import org.slf4j.LoggerFactory; … … 45 54 private static final Logger logger = 46 55 LoggerFactory.getLogger(CMDISchemaLoader.class); 56 private static final String USER_AGENT = 57 "CMDI-Validator-SchemaLoader/" + Version.getVersion(); 47 58 private static final String XML_XSD_RESSOURCE = "/xml.xsd"; 59 private static final String EXTENSION_XSD = "xsd"; 60 private static final String EXTENSION_ERROR = "error"; 48 61 private final File cacheDirectory; 49 62 private final long maxCacheAge; 50 private final HttpClient httpClient; 51 52 53 public CMDISchemaLoader(File cacheDirectory, long maxCacheAge) { 63 private final long maxNegativeCacheAge; 64 private final CloseableHttpClient httpClient; 65 private final Set<String> pending = new HashSet<String>(128); 66 private final Object guard = new Object(); 67 private final Object waiter = new Object(); 68 69 70 public CMDISchemaLoader(File cacheDirectory, long maxCacheAge, 71 long maxNegativeCacheAge) { 54 72 if (cacheDirectory == null) { 55 73 throw new NullPointerException("cacheDirectory == null"); … … 58 76 throw new IllegalArgumentException("maxCacheAge < -1"); 59 77 } 60 this.cacheDirectory = cacheDirectory; 61 this.maxCacheAge = maxCacheAge; 62 this.httpClient = new DefaultHttpClient(); 63 64 final HttpParams params = this.httpClient.getParams(); 65 params.setParameter(CoreProtocolPNames.USER_AGENT, 66 this.getClass().getPackage().getName() + "/0.0.1"); 67 params.setBooleanParameter(ClientPNames.HANDLE_REDIRECTS, Boolean.TRUE); 68 params.setBooleanParameter(ClientPNames.ALLOW_CIRCULAR_REDIRECTS, true); 69 params.setIntParameter(ClientPNames.MAX_REDIRECTS, 16); 78 if (maxNegativeCacheAge < -1) { 79 throw new IllegalArgumentException("maxNegativeCacheAge < -1"); 80 } 81 this.cacheDirectory = cacheDirectory; 82 this.maxCacheAge = maxCacheAge; 83 this.maxNegativeCacheAge = maxNegativeCacheAge; 84 this.httpClient = createHttpClient(2500, 5000); 85 } 86 87 88 public CMDISchemaLoader(File cacheDirectory, long maxCacheAge) { 89 this(cacheDirectory, maxCacheAge, TimeUnit.HOURS.toMillis(1)); 70 90 } 71 91 … … 76 96 77 97 78 public synchronizedInputStream loadSchemaFile(String targetNamespace,98 public InputStream loadSchemaFile(String targetNamespace, 79 99 String schemaLocation) throws IOException { 80 100 if (targetNamespace == null) { … … 99 119 100 120 // fall back to file cache ... 101 final File cacheFile = makeCacheFile(schemaLocation); 102 if (cacheFile.exists()) { 103 final long age = 104 System.currentTimeMillis() - cacheFile.lastModified(); 105 if ((maxCacheAge != DISABLE_CACHE_AGING) && (age > maxCacheAge)) { 106 logger.trace("-> cached file '{}' expired", cacheFile); 107 cacheFile.delete(); 121 final File cacheDataFile = 122 makeFile(schemaLocation, EXTENSION_XSD); 123 final File cacheErrorFile = 124 makeFile(schemaLocation, EXTENSION_ERROR); 125 126 for (;;) { 127 boolean doDownload = false; 128 129 synchronized (guard) { 130 /* 131 * check, if an earlier attempt to download the schema failed. 132 */ 133 if (cacheErrorFile.exists()) { 134 if (isExpired(cacheErrorFile, maxNegativeCacheAge)) { 135 logger.trace("-> error file '{}' expired", 136 cacheErrorFile); 137 cacheErrorFile.delete(); 138 } else { 139 throw new IOException("cached error condition detected"); 140 } 141 } 142 143 if (cacheDataFile.exists()) { 144 if (isExpired(cacheDataFile, maxCacheAge)) { 145 logger.debug("cached entry for '{}' has expired", 146 schemaLocation); 147 cacheDataFile.delete(); 148 } else { 149 logger.trace("-> from file cache"); 150 return new FileInputStream(cacheDataFile); 151 } 152 } 153 154 synchronized (pending) { 155 if (!pending.contains(schemaLocation)) { 156 doDownload = true; 157 pending.add(schemaLocation); 158 } 159 } // synchronized (pending) 160 } // synchronized (guard) 161 162 // either download in this thread of wait for pending download 163 if (doDownload) { 164 boolean failed = false; 165 try { 166 download(cacheDataFile, schemaLocation); 167 return new FileInputStream(cacheDataFile); 168 } catch (IOException e) { 169 failed = true; 170 throw e; 171 } finally { 172 synchronized (guard) { 173 if (failed) { 174 if (cacheErrorFile.exists()) { 175 cacheErrorFile.setLastModified( 176 System.currentTimeMillis()); 177 } else { 178 cacheErrorFile.createNewFile(); 179 } 180 } 181 synchronized (pending) { 182 pending.remove(schemaLocation); 183 synchronized (waiter) { 184 waiter.notifyAll(); 185 } // synchronized (waiter) 186 }// synchronized (pending) 187 } // synchronized (guard) 188 } 108 189 } else { 109 logger.trace("-> from file cache"); 110 return new FileInputStream(cacheFile); 111 } 112 } 113 190 try { 191 synchronized (waiter) { 192 waiter.wait(); 193 } // synchronized (waiter) 194 } catch (InterruptedException e) { 195 throw new InterruptedIOException( 196 "interrupted while waiting for download"); 197 } 198 } 199 } // for 200 } 201 202 203 private void download(File cacheFile, String schemaLocation) 204 throws IOException { 114 205 try { 115 206 logger.debug("downloading schema from '{}'", schemaLocation); 116 207 final URI uri = new URI(schemaLocation); 117 final HttpResponse response = executeRequest(uri); 118 final HttpEntity entity = response.getEntity(); 119 if (entity == null) { 120 throw new IOException("the request returned no message body"); 121 } 122 208 final HttpGet request = new HttpGet(uri); 123 209 try { 124 final InputStream in = entity.getContent(); 125 126 final FileOutputStream out = 127 new FileOutputStream(cacheFile); 128 int read; 129 final byte[] buffer = new byte[4096]; 130 while ((read = in.read(buffer)) != -1) { 131 out.write(buffer, 0, read); 132 } 133 out.close(); 134 135 return new FileInputStream(cacheFile); 136 } catch (IllegalStateException e) { 137 throw new IOException("error reading response", e); 138 } catch (IOException e) { 139 /* delete broken cache file */ 140 if (cacheFile != null) { 141 cacheFile.delete(); 142 } 143 throw e; 210 logger.trace("submitting HTTP request: {}", uri.toString()); 211 final CloseableHttpResponse response = 212 httpClient.execute(request, new BasicHttpContext()); 213 try { 214 final StatusLine status = response.getStatusLine(); 215 if (status.getStatusCode() == HttpStatus.SC_OK) { 216 final HttpEntity entity = response.getEntity(); 217 if (entity == null) { 218 throw new IOException( 219 "request returned no message body"); 220 } 221 222 FileOutputStream out = null; 223 try { 224 out = new FileOutputStream(cacheFile); 225 // use exclusive lock 226 final FileLock lock = out.getChannel().lock(); 227 try { 228 entity.writeTo(out); 229 out.flush(); 230 out.getFD().sync(); 231 } finally { 232 lock.release(); 233 } 234 } finally { 235 if (out != null) { 236 out.close(); 237 } 238 } 239 } else { 240 switch (status.getStatusCode()) { 241 case HttpStatus.SC_NOT_FOUND: 242 throw new IOException("not found: " + uri); 243 default: 244 throw new IOException("unexpected status: " + 245 status.getStatusCode()); 246 } // switch 247 } 248 } catch (IOException e) { 249 /* delete broken cache file */ 250 if (cacheFile != null) { 251 cacheFile.delete(); 252 } 253 throw e; 254 } finally { 255 /* make sure to release allocated resources */ 256 response.close(); 257 } 144 258 } finally { 145 /* make sure to release allocated resources */ 146 HttpClientUtils.closeQuietly(response); 259 request.reset(); 147 260 } 148 261 } catch (URISyntaxException e) { … … 153 266 154 267 155 private File make CacheFile(String schemaLocation) {268 private File makeFile(String schemaLocation, String extension) { 156 269 final StringBuilder sb = new StringBuilder(); 157 270 for (int i = 0; i < schemaLocation.length(); i++) { … … 183 296 } 184 297 } // for 185 sb.append(". xsd");298 sb.append(".").append(extension); 186 299 return new File(cacheDirectory, sb.toString()); 187 300 } 188 301 189 302 190 private HttpResponse executeRequest(URI uri) throws IOException { 191 HttpGet request = null; 192 HttpResponse response = null; 193 try { 194 logger.trace("submitting HTTP request: {}", uri.toString()); 195 request = new HttpGet(uri); 196 response = httpClient.execute(request); 197 StatusLine status = response.getStatusLine(); 198 if (status.getStatusCode() != HttpStatus.SC_OK) { 199 if (status.getStatusCode() == HttpStatus.SC_NOT_FOUND) { 200 throw new IOException("not found: " + uri); 201 } else { 202 throw new IOException("unexpected status: " + 203 status.getStatusCode()); 204 } 205 } 206 return response; 207 } catch (IOException e) { 208 /* 209 * if an error occurred, make sure we are freeing up the resources 210 * we've used 211 */ 212 if (response != null) { 213 try { 214 EntityUtils.consume(response.getEntity()); 215 } catch (IOException ex) { 216 /* IGNORE */ 217 } 218 219 /* make sure to release allocated resources */ 220 HttpClientUtils.closeQuietly(response); 221 } 222 if (request != null) { 223 request.abort(); 224 } 225 throw e; 226 } 303 private CloseableHttpClient createHttpClient(int connectTimeout, 304 int socketTimeout) { 305 final PoolingHttpClientConnectionManager manager = 306 new PoolingHttpClientConnectionManager(); 307 manager.setDefaultMaxPerRoute(8); 308 manager.setMaxTotal(128); 309 310 final SocketConfig socketConfig = SocketConfig.custom() 311 .setSoReuseAddress(true) 312 .setSoLinger(0) 313 .build(); 314 315 final RequestConfig requestConfig = RequestConfig.custom() 316 .setAuthenticationEnabled(false) 317 .setRedirectsEnabled(true) 318 .setMaxRedirects(4) 319 .setCircularRedirectsAllowed(false) 320 .setCookieSpec(CookieSpecs.IGNORE_COOKIES) 321 .setConnectTimeout(connectTimeout) 322 .setSocketTimeout(socketTimeout) 323 .setConnectionRequestTimeout(0) /* infinite */ 324 .setStaleConnectionCheckEnabled(false) 325 .build(); 326 327 final ConnectionKeepAliveStrategy keepAliveStrategy = 328 new ConnectionKeepAliveStrategy() { 329 @Override 330 public long getKeepAliveDuration(final HttpResponse response, 331 final HttpContext context) { 332 return 60000; 333 } 334 }; 335 336 return HttpClients.custom() 337 .setUserAgent(USER_AGENT) 338 .setConnectionManager(manager) 339 .setDefaultSocketConfig(socketConfig) 340 .setDefaultRequestConfig(requestConfig) 341 .setKeepAliveStrategy(keepAliveStrategy) 342 .build(); 343 } 344 345 346 private static boolean isExpired(File file, long maxAge) { 347 if (maxAge != DISABLE_CACHE_AGING) { 348 return (System.currentTimeMillis() - file.lastModified()) >= maxAge; 349 } else { 350 return false; 351 } 352 } 353 354 355 @Override 356 protected void finalize() throws Throwable { 357 httpClient.close(); 227 358 } 228 359 -
CMDIValidator/trunk/cmdi-validator-core/src/main/java/eu/clarin/cmdi/validator/extensions/CheckHandlesExtension.java
r5384 r5387 17 17 package eu.clarin.cmdi.validator.extensions; 18 18 19 import java.io.IOException; 19 20 import java.net.URI; 20 21 import java.net.URISyntaxException; … … 46 47 private static final Logger logger = 47 48 LoggerFactory.getLogger(CheckHandlesExtension.class); 48 private final int threads;49 49 private final boolean resolveHandles; 50 50 private HandleResolver resolver = null; … … 52 52 53 53 54 public CheckHandlesExtension(int threads, boolean resolveHandles) { 55 if (threads < 1) { 56 throw new IllegalArgumentException("threads < 1"); 57 } 58 this.threads = threads; 54 public CheckHandlesExtension(boolean resolveHandles) { 59 55 this.resolveHandles = resolveHandles; 60 56 } 61 57 62 58 63 public CheckHandlesExtension(boolean resolveHandles) { 64 this(Runtime.getRuntime().availableProcessors(), resolveHandles); 59 public boolean isResolvingHandles() { 60 return resolveHandles; 61 } 62 63 64 public HandleResolver.Statistics getStatistics() { 65 return (resolver != null) ? resolver.getStatistics() : null; 65 66 } 66 67 … … 69 70 protected void doInitialize() throws CMDIValidatorInitException { 70 71 if (resolveHandles) { 71 this.resolver = new HandleResolver( threads);72 this.resolver = new HandleResolver(); 72 73 } 73 74 … … 89 90 selector.setContextItem(document); 90 91 for (XdmItem item : selector) { 91 String handle = item.getStringValue(); 92 if (handle != null) { 93 handle = handle.trim(); 92 String handle = null; 93 final String h = item.getStringValue(); 94 if (h != null) { 95 handle = h.trim(); 94 96 if (handle.isEmpty()) { 95 97 handle = null; 98 } else { 99 if (!handle.equals(h)) { 100 result.reportWarning(-1, -1, "handle '" + h + 101 "' contains leading or tailing spaces " + 102 "within <ResourceRef> element"); 103 } 96 104 } 97 105 } … … 124 132 checkHandleResolves(actionableURI, result); 125 133 } catch (URISyntaxException e) { 134 /* should not happen */ 126 135 throw new CMDIValidatorException( 127 136 "created an invalid URI", e); … … 137 146 } else if (HDL_PROXY_HTTP.equalsIgnoreCase(uri.getScheme()) || 138 147 HDL_PROXY_HTTPS.equalsIgnoreCase(uri.getScheme())) { 139 if (HDL_PROXY_HOST.equalsIgnoreCase(uri.getHost())) { 148 if (uri.getHost() != null) { 149 if (!HDL_PROXY_HOST.equalsIgnoreCase(uri.getHost())) { 150 result.reportError(-1, -1, 151 "The URI of PID '" + handle + 152 "' contains an unexpected host part of '" + 153 uri.getHost() + "'"); 154 } 140 155 checkHandleResolves(uri, result); 141 156 } else { 157 result.reportError(-1, -1, "The URI of PID '" + handle + 158 "' is missing the host part"); 159 } 160 } else { 161 if (uri.getScheme() != null) { 142 162 result.reportError(-1, -1, 143 "PID '" + handle + 144 "' contains an unexpected host part in the URI: " + 145 uri.getHost()); 146 } 147 } else { 148 result.reportError(-1, -1, 149 "PID '" + handle + 150 "' contains an unexpected schema part in the URI: " + 151 uri.getScheme()); 163 "The URI of PID '" + handle + 164 "' contains an unexpected schema part of '" + 165 uri.getScheme() + "'"); 166 } else { 167 result.reportError(-1, -1, "The URI of PID '" + handle + 168 "' is missing a proper schema part"); 169 } 152 170 } 153 171 } catch (URISyntaxException e) { … … 161 179 CMDIValidatorWriteableResult result) throws CMDIValidatorException { 162 180 if (resolver != null) { 163 int code = resolver.resolve(uri); 164 switch (code) { 165 case HttpStatus.SC_OK: 166 /* no special message in this case */ 167 break; 168 case HttpStatus.SC_UNAUTHORIZED: 169 /* FALL-THROUGH */ 170 case HttpStatus.SC_FORBIDDEN: 171 result.reportInfo(-1, -1, "PID '" + uri + 172 "' resolved to an access protected resource (" + code + 173 ")"); 174 break; 175 case HttpStatus.SC_NOT_FOUND: 176 result.reportError(-1, -1, "PID '" + uri + 177 "' resolved to an non-existing resource (" + code + ")"); 178 break; 179 default: 180 result.reportError(-1, -1, "PID '" + uri + 181 "' resolved with an unexpected result (" + code + ")"); 182 break; 181 try { 182 int code = resolver.resolve(uri); 183 switch (code) { 184 case HttpStatus.SC_OK: 185 /* no special message in this case */ 186 break; 187 case HttpStatus.SC_UNAUTHORIZED: 188 /* FALL-THROUGH */ 189 case HttpStatus.SC_FORBIDDEN: 190 result.reportInfo(-1, -1, "PID '" + uri + 191 "' resolved to an access protected resource (" + 192 code + ")"); 193 break; 194 case HttpStatus.SC_NOT_FOUND: 195 result.reportError(-1, -1, "PID '" + uri + 196 "' resolved to an non-existing resource (" + code + 197 ")"); 198 break; 199 case HandleResolver.TIMEOUT: 200 result.reportWarning(-1, -1, "Timeout while resolving PID '" + 201 uri + "'"); 202 break; 203 case HandleResolver.UNKNOWN_HOST: 204 result.reportWarning(-1, -1, "Unable to resolve host '" + 205 uri.getHost() + "' while resolving PID '" + 206 uri + "'"); 207 break; 208 case HandleResolver.ERROR: 209 result.reportWarning(-1, -1, 210 "An error occurred while resolving PID '" + 211 uri + "'"); 212 break; 213 default: 214 result.reportWarning(-1, -1, "PID '" + uri + 215 "' resolved with an unexpected result (" + code + 216 ")"); 217 break; 218 } // switch 219 } catch (IOException e) { 220 throw new CMDIValidatorException( 221 "error while resolving handle '" + uri + "'", e); 183 222 } 184 223 } -
CMDIValidator/trunk/cmdi-validator-core/src/main/java/eu/clarin/cmdi/validator/utils/HandleResolver.java
r5384 r5387 18 18 19 19 import java.io.IOException; 20 import java.io.InterruptedIOException; 21 import java.net.SocketTimeoutException; 20 22 import java.net.URI; 23 import java.net.UnknownHostException; 21 24 import java.util.HashSet; 22 25 import java.util.Set; 26 import java.util.concurrent.Semaphore; 27 import java.util.concurrent.atomic.AtomicInteger; 28 import java.util.concurrent.atomic.AtomicLong; 23 29 24 30 import org.apache.http.HttpResponse; 25 31 import org.apache.http.StatusLine; 26 import org.apache.http.client.HttpClient; 32 import org.apache.http.client.config.CookieSpecs; 33 import org.apache.http.client.config.RequestConfig; 34 import org.apache.http.client.methods.CloseableHttpResponse; 27 35 import org.apache.http.client.methods.HttpHead; 28 import org.apache.http.client.params.ClientPNames; 29 import org.apache.http.client.utils.HttpClientUtils; 30 import org.apache.http.impl.client.DefaultHttpClient; 31 import org.apache.http.params.CoreProtocolPNames; 32 import org.apache.http.params.HttpParams; 36 import org.apache.http.config.ConnectionConfig; 37 import org.apache.http.config.SocketConfig; 38 import org.apache.http.conn.ConnectTimeoutException; 39 import org.apache.http.conn.ConnectionKeepAliveStrategy; 40 import org.apache.http.impl.client.CloseableHttpClient; 41 import org.apache.http.impl.client.HttpClients; 42 import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; 43 import org.apache.http.protocol.BasicHttpContext; 44 import org.apache.http.protocol.HttpContext; 33 45 import org.slf4j.Logger; 34 46 import org.slf4j.LoggerFactory; 35 47 36 import eu.clarin.cmdi.validator. CMDIValidatorException;48 import eu.clarin.cmdi.validator.Version; 37 49 38 50 public class HandleResolver { 51 public static final class Statistics { 52 private final long cacheHitCount; 53 private final long cacheMissCount; 54 private final long timeoutCount; 55 private final long unknownHostCount; 56 private final long errorCount; 57 private final long totalRequestsCount; 58 private final int currentRequestsCount; 59 private final int currentCacheSize; 60 61 private Statistics(long cacheHitCount, 62 long cacheMissCount, 63 long timeoutCount, 64 long unknownHostCount, 65 long errorCount, 66 long totalRequestsCount, 67 int currentRequestsCount, 68 int currentCacheSize) { 69 this.cacheHitCount = cacheHitCount; 70 this.cacheMissCount = cacheMissCount; 71 this.timeoutCount = timeoutCount; 72 this.unknownHostCount = unknownHostCount; 73 this.errorCount = errorCount; 74 this.totalRequestsCount = totalRequestsCount; 75 this.currentRequestsCount = currentRequestsCount; 76 this.currentCacheSize = currentCacheSize; 77 } 78 79 80 public long getCacheHitCount() { 81 return cacheHitCount; 82 } 83 84 85 public long getCacheMissCount() { 86 return cacheMissCount; 87 } 88 89 90 public long getTimeoutCount() { 91 return timeoutCount; 92 } 93 94 95 public long getUnknownHostCount() { 96 return unknownHostCount; 97 } 98 99 100 public long getErrorCount() { 101 return errorCount; 102 } 103 104 105 public long getTotalRequestsCount() { 106 return totalRequestsCount; 107 } 108 109 110 public int getCurrentRequestsCount() { 111 return currentRequestsCount; 112 } 113 114 public int getCurrentCacheSize() { 115 return currentCacheSize; 116 } 117 } 39 118 private static final Logger logger = 40 119 LoggerFactory.getLogger(HandleResolver.class); 41 public final int ERROR = -1; 120 public static final int TIMEOUT = -1; 121 public static final int UNKNOWN_HOST = -2; 122 public static final int ERROR = -3; 123 private static final String USER_AGENT = 124 "CMDI-Validator-HandleResolver/" + Version.getVersion(); 125 private static final int DEFAULT_MAX_CONCURRENT_REQUESTS = 8; 126 private static final int DEFAULT_CONNECT_TIMEOUT = 5000; 127 private static final int DEFAULT_SOCKET_TIMEOUT = 10000; 42 128 private final LRUCache<URI, Integer> cache = 43 129 new LRUCache<URI, Integer>(16 * 1024); 44 130 private final Set<URI> pending; 131 private final int maxConcurrentRequestsCount; 132 private final Semaphore maxConcurrentRequests; 133 private final CloseableHttpClient client; 134 private long cacheHitCount = 0; 135 private long cacheMissCount = 0; 136 private AtomicLong timeoutCount = new AtomicLong(); 137 private AtomicLong unknownHostCount = new AtomicLong(); 138 private AtomicLong errorCount = new AtomicLong(); 139 private AtomicLong totalRequestsCount = new AtomicLong(); 140 private AtomicInteger currentRequestCount = new AtomicInteger(); 45 141 private final Object waiter = new Object(); 46 private long cacheHits = 0; 47 private long cacheMisses = 0; 48 49 50 public HandleResolver(final int threads) {51 this.pending = new HashSet<URI>(threads * 2);52 }53 54 55 public long getCacheHits() {56 return cacheHits;57 } 58 59 60 public long getCacheMisses() {61 return cacheMisses;62 } 63 64 65 public int resolve(final URI handle) throws CMDIValidatorException {142 143 144 public HandleResolver(int maxConcurrentRequests) { 145 if (maxConcurrentRequests < 1) { 146 throw new IllegalArgumentException("maxConcurrentRequests < 1"); 147 } 148 this.pending = new HashSet<URI>(maxConcurrentRequests * 4); 149 this.client = createHttpClient(DEFAULT_CONNECT_TIMEOUT, 150 DEFAULT_SOCKET_TIMEOUT); 151 this.maxConcurrentRequestsCount = maxConcurrentRequests; 152 this.maxConcurrentRequests = new Semaphore(maxConcurrentRequests, true); 153 } 154 155 156 public HandleResolver() { 157 this(DEFAULT_MAX_CONCURRENT_REQUESTS); 158 } 159 160 161 public int resolve(final URI handle) throws IOException { 66 162 if (handle == null) { 67 163 throw new NullPointerException("handle == null"); 68 164 } 69 165 logger.debug("resolving '{}'", handle); 166 totalRequestsCount.incrementAndGet(); 70 167 for (;;) { 71 168 boolean doResolve = false; … … 75 172 logger.trace("got cached result for '{}': {}", 76 173 handle, cached); 77 cacheHit s++;174 cacheHitCount++; 78 175 return cached.intValue(); 79 176 } 177 cacheMissCount++; 80 178 81 179 synchronized (pending) { 82 180 if (!pending.contains(handle)) { 83 cacheMisses++;84 181 doResolve = true; 85 182 pending.add(handle); … … 92 189 int result = ERROR; 93 190 try { 94 final HttpClient httpClient = newHttpClient(); 95 try { 96 result = doResolve(handle, httpClient); 97 } finally { 98 HttpClientUtils.closeQuietly(httpClient); 99 } 100 } catch (Throwable e) { 101 throw new CMDIValidatorException( 102 "error while resolving handle '" + handle + "'", e); 191 result = doResolve(handle); 103 192 } finally { 104 193 // cache result and notify other threads … … 114 203 } // synchronized (pending) 115 204 } // synchronized (cache) 205 if (result == ERROR) { 206 errorCount.incrementAndGet(); 207 } 116 208 } 117 209 return result; … … 122 214 } // synchronized (waiter) 123 215 } catch (InterruptedException e) { 216 errorCount.incrementAndGet(); 124 217 return ERROR; 125 218 } … … 129 222 130 223 131 private int doResolve(final URI handle, final HttpClient httpClient) 132 throws CMDIValidatorException { 224 public Statistics getStatistics() { 225 synchronized (cache) { 226 return new Statistics(cacheHitCount, 227 cacheMissCount, 228 timeoutCount.get(), 229 unknownHostCount.get(), 230 errorCount.get(), 231 totalRequestsCount.get(), 232 currentRequestCount.get(), 233 cache.size()); 234 } // synchronized (cache) 235 } 236 237 238 public void clear() { 239 try { 240 try { 241 // acquire all permits to deny any requests from happening ... 242 maxConcurrentRequests.acquire(maxConcurrentRequestsCount); 243 244 // clear data 245 synchronized (cache) { 246 cache.clear(); 247 cacheHitCount = 0; 248 cacheMissCount = 0; 249 timeoutCount.set(0); 250 unknownHostCount.set(0); 251 errorCount.set(0); 252 totalRequestsCount.set(0); 253 } // synchronized (cache) 254 } finally { 255 maxConcurrentRequests.release(); 256 } 257 } catch (InterruptedException e) { 258 /* IGNORE */ 259 } 260 } 261 262 263 private int doResolve(final URI handle) throws IOException { 133 264 logger.trace("performing HTTP request for '{}'", handle); 134 HttpHead request = null; 135 HttpResponse response = null; 265 136 266 try { 137 request = new HttpHead(handle); 138 response = httpClient.execute(request); 139 140 final StatusLine status = response.getStatusLine(); 141 return status.getStatusCode(); 142 } catch (IOException e) { 143 if (request != null) { 144 request.abort(); 267 maxConcurrentRequests.acquire(); 268 } catch (InterruptedException e) { 269 throw new InterruptedIOException(); 270 } 271 272 currentRequestCount.incrementAndGet(); 273 final HttpHead request = new HttpHead(handle); 274 try { 275 final CloseableHttpResponse response = 276 client.execute(request, new BasicHttpContext()); 277 try { 278 final StatusLine status = response.getStatusLine(); 279 return status.getStatusCode(); 280 } finally { 281 response.close(); 145 282 } 146 throw new CMDIValidatorException("error resolving handle '" + 147 handle + "'", e); 283 } catch (ConnectTimeoutException e) { 284 timeoutCount.incrementAndGet(); 285 return TIMEOUT; 286 } catch (SocketTimeoutException e) { 287 timeoutCount.incrementAndGet(); 288 return TIMEOUT; 289 } catch (UnknownHostException e) { 290 unknownHostCount.incrementAndGet(); 291 return UNKNOWN_HOST; 148 292 } finally { 149 /* make sure to release allocated resources */ 150 HttpClientUtils.closeQuietly(response); 151 } 152 } 153 154 155 private HttpClient newHttpClient() { 156 final HttpClient client = new DefaultHttpClient(); 157 final HttpParams params = client.getParams(); 158 params.setParameter(CoreProtocolPNames.USER_AGENT, 159 getClass().getName() + "/1.0"); 160 params.setBooleanParameter(ClientPNames.HANDLE_REDIRECTS, Boolean.TRUE); 161 params.setBooleanParameter(ClientPNames.ALLOW_CIRCULAR_REDIRECTS, true); 162 params.setIntParameter(ClientPNames.MAX_REDIRECTS, 16); 163 return client; 293 request.reset(); 294 currentRequestCount.decrementAndGet(); 295 maxConcurrentRequests.release(); 296 } 297 } 298 299 300 private CloseableHttpClient createHttpClient(int connectTimeout, 301 int socketTimeout) { 302 final PoolingHttpClientConnectionManager manager = 303 new PoolingHttpClientConnectionManager(); 304 manager.setDefaultMaxPerRoute(8); 305 manager.setMaxTotal(128); 306 307 final SocketConfig socketConfig = SocketConfig.custom() 308 .setSoReuseAddress(true) 309 .setSoLinger(0) 310 .build(); 311 312 final ConnectionConfig connectionConfig = ConnectionConfig.custom() 313 .setBufferSize(1024) 314 .build(); 315 316 final RequestConfig requestConfig = RequestConfig.custom() 317 .setAuthenticationEnabled(false) 318 .setRedirectsEnabled(true) 319 .setMaxRedirects(4) 320 .setCircularRedirectsAllowed(false) 321 .setCookieSpec(CookieSpecs.IGNORE_COOKIES) 322 .setConnectTimeout(connectTimeout) 323 .setSocketTimeout(socketTimeout) 324 .setConnectionRequestTimeout(0) /* infinite */ 325 .setStaleConnectionCheckEnabled(false) 326 .build(); 327 328 final ConnectionKeepAliveStrategy keepAliveStrategy = 329 new ConnectionKeepAliveStrategy() { 330 @Override 331 public long getKeepAliveDuration(final HttpResponse response, 332 final HttpContext context) { 333 return 15000; 334 } 335 }; 336 337 return HttpClients.custom() 338 .setUserAgent(USER_AGENT) 339 .setConnectionManager(manager) 340 .setDefaultSocketConfig(socketConfig) 341 .setDefaultConnectionConfig(connectionConfig) 342 .setDefaultRequestConfig(requestConfig) 343 .setKeepAliveStrategy(keepAliveStrategy) 344 .build(); 345 } 346 347 348 @Override 349 protected void finalize() throws Throwable { 350 client.close(); 164 351 } 165 352 -
CMDIValidator/trunk/cmdi-validator-tool/pom.xml
r5384 r5387 42 42 <artifactId>slf4j-log4j12</artifactId> 43 43 </dependency> 44 45 <dependency> 46 <groupId>org.slf4j</groupId> 47 <artifactId>jcl-over-slf4j</artifactId> 48 <scope>runtime</scope> 49 </dependency> 50 44 51 45 52 <!-- project specific dependencies --> -
CMDIValidator/trunk/cmdi-validator-tool/src/main/java/eu/clarin/cmdi/validator/tool/CMDIValidatorTool.java
r5384 r5387 23 23 import java.util.Locale; 24 24 import java.util.concurrent.TimeUnit; 25 25 26 import net.java.truevfs.access.TFile; 26 27 import net.java.truevfs.access.TVFS; … … 48 49 import eu.clarin.cmdi.validator.CMDIValidatorResult.Severity; 49 50 import eu.clarin.cmdi.validator.extensions.CheckHandlesExtension; 51 import eu.clarin.cmdi.validator.utils.HandleResolver; 50 52 51 53 52 54 public class CMDIValidatorTool { 53 private static final String PRG_NAME = "cmdi-validator";54 private static final long DEFAULT_PROGRESS_INTERVAL = 15000;55 private static final Locale LOCALE = Locale.ENGLISH;56 private static final char OPT_DEBUG = 'd';57 private static final char OPT_QUIET = 'q';58 private static final char OPT_VERBOSE = 'v';59 private static final char OPT_ NO_PROGRESS = 'P';60 private static final char OPT_ THREAD_COUNT = 't';61 private static final char OPT_NO_ THREADS = 'T';62 private static final char OPT_ NO_ESTIMATE = 'E';63 private static final char OPT_ SCHEMA_CACHE_DIR = 'c';64 private static final char OPT_ NO_SCHEMATRON = 'S';65 private static final char OPT_ SCHEMATRON_FILE = 's';66 private static final char OPT_CHECK_ PIDS = 'p';55 private static final String PRG_NAME = "cmdi-validator"; 56 private static final long DEFAULT_PROGRESS_INTERVAL = 15000; 57 private static final Locale LOCALE = Locale.ENGLISH; 58 private static final char OPT_DEBUG = 'd'; 59 private static final char OPT_QUIET = 'q'; 60 private static final char OPT_VERBOSE = 'v'; 61 private static final char OPT_THREAD_COUNT = 't'; 62 private static final char OPT_NO_THREADS = 'T'; 63 private static final char OPT_NO_ESTIMATE = 'E'; 64 private static final char OPT_SCHEMA_CACHE_DIR = 'c'; 65 private static final char OPT_NO_SCHEMATRON = 'S'; 66 private static final char OPT_SCHEMATRON_FILE = 's'; 67 private static final char OPT_CHECK_PIDS = 'p'; 68 private static final char OPT_CHECK_AND_RESOLVE_PIDS = 'P'; 67 69 private static final Logger logger = 68 70 LoggerFactory.getLogger(CMDIValidatorTool.class); … … 84 86 File schematronFile = null; 85 87 boolean checkPids = false; 88 boolean checkAndResolvePids = false; 86 89 87 90 /* … … 103 106 } 104 107 if (line.hasOption(OPT_NO_SCHEMATRON) && line.hasOption(OPT_SCHEMATRON_FILE)) { 105 throw new ParseException("The -s and -T options are mutually exclusive"); 106 } 108 throw new ParseException("The -s and -S options are mutually exclusive"); 109 } 110 if (line.hasOption(OPT_CHECK_PIDS) && line.hasOption(OPT_CHECK_AND_RESOLVE_PIDS)) { 111 throw new ParseException("The -p and -P options are mutually exclusive"); 112 } 113 107 114 // extract options 108 115 if (line.hasOption(OPT_DEBUG)) { … … 115 122 verbose = true; 116 123 } 117 if ( line.hasOption(OPT_NO_PROGRESS) ||quiet) {124 if (quiet) { 118 125 progressInterval = -1; 119 126 } … … 157 164 if (line.hasOption(OPT_CHECK_PIDS)) { 158 165 checkPids = true; 166 } 167 if (line.hasOption(OPT_CHECK_AND_RESOLVE_PIDS)) { 168 checkAndResolvePids = true; 159 169 } 160 170 … … 224 234 builder.disableSchematron(); 225 235 } 226 if (checkPids) { 227 logger.info("performing PID checking"); 228 builder.extension( 229 new CheckHandlesExtension(threadCount, true)); 236 237 CheckHandlesExtension checkHandleExtension = null; 238 if (checkPids || checkAndResolvePids) { 239 if (checkAndResolvePids) { 240 logger.info("enabling PID validation (syntax and resolving)"); 241 } else { 242 logger.info("enabling PID validation (syntax only)"); 243 } 244 checkHandleExtension = 245 new CheckHandlesExtension(checkAndResolvePids); 246 builder.extension(checkHandleExtension); 230 247 } 231 248 … … 279 296 ((bps != -1) ? Humanize.binaryPrefix(bps, LOCALE) : "N/A MB")); 280 297 } 298 if (logger.isDebugEnabled()) { 299 if ((checkHandleExtension != null) && 300 checkHandleExtension.isResolvingHandles()) { 301 final HandleResolver.Statistics stats = 302 checkHandleExtension.getStatistics(); 303 logger.debug("[handle resolver stats] total requests: {}, running requests: {}, cache hits: {}, cache misses: {}, current cache size: {}", 304 stats.getTotalRequestsCount(), 305 stats.getCurrentRequestsCount(), 306 stats.getCacheHitCount(), 307 stats.getCacheMissCount(), 308 stats.getCurrentCacheSize()); 309 } 310 } 281 311 } 282 312 } // for (;;) … … 360 390 .withLongOpt("verbose") 361 391 .create(OPT_VERBOSE)); 362 options.addOption(OptionBuilder363 .withDescription("no progress reporting")364 .withLongOpt("no-progress")365 .create(OPT_NO_PROGRESS));366 392 OptionGroup g2 = new OptionGroup(); 367 393 g2.addOption(OptionBuilder … … 398 424 .create(OPT_SCHEMATRON_FILE)); 399 425 options.addOptionGroup(g3); 400 options.addOption(OptionBuilder 401 .withDescription("check if persistent identifiers resolve correctly") 426 OptionGroup g4 = new OptionGroup(); 427 g4.addOption(OptionBuilder 428 .withDescription("check persistent identifiers syntax") 402 429 .withLongOpt("check-pids") 403 430 .create(OPT_CHECK_PIDS)); 431 g4.addOption(OptionBuilder 432 .withDescription("check persistent identifiers syntax and if they resolve properly") 433 .withLongOpt("check-and-resolve-pids") 434 .create(OPT_CHECK_AND_RESOLVE_PIDS)); 435 options.addOptionGroup(g4); 404 436 return options; 405 437 } -
CMDIValidator/trunk/pom.xml
r5384 r5387 50 50 <version>${slf4j.version}</version> 51 51 </dependency> 52 53 <dependency> 54 <groupId>org.slf4j</groupId> 55 <artifactId>jcl-over-slf4j</artifactId> 56 <version>${slf4j.version}</version> 57 </dependency> 52 58 </dependencies> 53 59 </dependencyManagement>
Note: See TracChangeset
for help on using the changeset viewer.