Changeset 5458 for OAIHarvester
- Timestamp:
- 07/08/14 11:50:40 (10 years ago)
- Location:
- OAIHarvester/trunk/OAIHarvester
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
OAIHarvester/trunk/OAIHarvester/pom.xml
r3026 r5458 1 < project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"2 3 1 <?xml version="1.0" encoding="UTF-8"?><project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 2 xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> 3 <modelVersion>4.0.0</modelVersion> 4 4 5 6 7 <version>0.2.0-SNAPSHOT</version>8 9 5 <groupId>eu.clarin.cmdi</groupId> 6 <artifactId>OAIHarvester</artifactId> 7 <version>0.3.0-SNAPSHOT</version> 8 <packaging>jar</packaging> 9 <name>OAIHarvester</name> 10 10 11 <repositories> 12 <repository> 13 <id>repository.jboss-public</id> 14 <name>JBoss Repository</name> 15 <url>https://repository.jboss.org/nexus/content/groups/public/</url> 16 </repository> 11 <properties> 12 <!-- project settings --> 13 <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> 14 <!-- plug-in settings --> 15 <maven.compiler.source>1.6</maven.compiler.source> 16 <maven.compiler.target>1.6</maven.compiler.target> 17 <!-- versions of common dependencies --> 18 <slf4j.version>1.7.7</slf4j.version> 19 </properties> 17 20 18 < repository>19 <id>repository.java-net</id>20 <name>Java.net Repository</name>21 <url>http://download.java.net/maven/2/</url>22 </repository>23 </repositories>21 <dependencies> 22 <dependency> 23 <groupId>org.slf4j</groupId> 24 <artifactId>slf4j-api</artifactId> 25 <version>${slf4j.version}</version> 26 </dependency> 24 27 25 <build> 26 <resources> 27 <resource> 28 <directory>src/main/resources</directory> 29 </resource> 30 </resources> 28 <dependency> 29 <groupId>org.apache.httpcomponents</groupId> 30 <artifactId>httpclient</artifactId> 31 <version>4.3.4</version> 32 </dependency> 31 33 32 <plugins> 33 <plugin> 34 <groupId>org.apache.maven.plugins</groupId> 35 <artifactId>maven-compiler-plugin</artifactId> 36 <version>2.3.2</version> 37 <configuration> 38 <source>${maven.compiler.source}</source> 39 <target>${maven.compiler.target}</target> 40 </configuration> 41 </plugin> 42 </plugins> 43 </build> 34 <dependency> 35 <groupId>org.codehaus.woodstox</groupId> 36 <artifactId>woodstox-core-lgpl</artifactId> 37 <version>4.4.0</version> 38 </dependency> 44 39 45 <dependencies> 46 <dependency> 47 <groupId>org.slf4j</groupId> 48 <artifactId>slf4j-api</artifactId> 49 <version>${slf4j.version}</version> 50 </dependency> 40 <!-- 41 <dependency> 42 <groupId>net.java.dev.msv</groupId> 43 <artifactId>msv-core</artifactId> 44 <version>2010.2</version> 45 <scope>runtime</scope> 46 </dependency> 47 --> 51 48 52 <dependency> 53 <groupId>commons-lang</groupId> 54 <artifactId>commons-lang</artifactId> 55 <version>2.6</version> 56 <type>jar</type> 57 <scope>compile</scope> 58 </dependency> 49 <dependency> 50 <groupId>joda-time</groupId> 51 <artifactId>joda-time</artifactId> 52 <version>2.3</version> 53 </dependency> 59 54 60 <dependency> 61 <groupId>org.apache.httpcomponents</groupId> 62 <artifactId>httpclient</artifactId> 63 <version>4.2.5</version> 64 </dependency> 55 <dependency> 56 <groupId>org.slf4j</groupId> 57 <artifactId>slf4j-log4j12</artifactId> 58 <version>${slf4j.version}</version> 59 <scope>test</scope> 60 </dependency> 61 </dependencies> 65 62 66 <dependency> 67 <groupId>org.codehaus.woodstox</groupId> 68 <artifactId>woodstox-core-lgpl</artifactId> 69 <version>4.2.0</version> 70 </dependency> 71 72 <dependency> 73 <groupId>net.java.dev.msv</groupId> 74 <artifactId>msv-core</artifactId> 75 <version>2010.2</version> 76 <scope>runtime</scope> 77 </dependency> 78 79 <dependency> 80 <groupId>joda-time</groupId> 81 <artifactId>joda-time</artifactId> 82 <version>2.0</version> 83 </dependency> 84 85 <dependency> 86 <groupId>org.slf4j</groupId> 87 <artifactId>slf4j-log4j12</artifactId> 88 <version>${slf4j.version}</version> 89 <scope>test</scope> 90 </dependency> 91 </dependencies> 92 93 <properties> 94 <!-- project settings --> 95 <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> 96 <!-- plug-in settings --> 97 <maven.compiler.source>1.6</maven.compiler.source> 98 <maven.compiler.target>1.6</maven.compiler.target> 99 <!-- versions of common dependencies --> 100 <slf4j.version>1.7.2</slf4j.version> 101 </properties> 63 <build> 64 <plugins> 65 <plugin> 66 <groupId>org.apache.maven.plugins</groupId> 67 <artifactId>maven-compiler-plugin</artifactId> 68 <version>2.3.2</version> 69 <configuration> 70 <source>${maven.compiler.source}</source> 71 <target>${maven.compiler.target}</target> 72 </configuration> 73 </plugin> 74 </plugins> 75 </build> 102 76 103 77 </project> -
OAIHarvester/trunk/OAIHarvester/src/main/java/eu/clarin/cmdi/oai/harvester/impl/HarvestWorker.java
r3931 r5458 13 13 import org.apache.http.HttpResponse; 14 14 import org.apache.http.HttpStatus; 15 import org.apache.http.client.HttpClient; 15 import org.apache.http.client.config.CookieSpecs; 16 import org.apache.http.client.config.RequestConfig; 17 import org.apache.http.client.methods.CloseableHttpResponse; 16 18 import org.apache.http.client.methods.HttpGet; 17 import org.apache.http.client.utils.HttpClientUtils; 18 import org.apache.http.impl.client.DefaultHttpClient; 19 import org.apache.http.impl.cookie.DateParseException; 20 import org.apache.http.impl.cookie.DateUtils; 21 import org.apache.http.params.CoreConnectionPNames; 22 import org.apache.http.params.CoreProtocolPNames; 23 import org.apache.http.params.HttpParams; 24 import org.apache.http.util.EntityUtils; 19 import org.apache.http.client.utils.DateUtils; 20 import org.apache.http.config.SocketConfig; 21 import org.apache.http.impl.NoConnectionReuseStrategy; 22 import org.apache.http.impl.client.CloseableHttpClient; 23 import org.apache.http.impl.client.HttpClients; 24 import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; 25 import org.apache.http.protocol.BasicHttpContext; 25 26 import org.joda.time.DateTime; 26 27 import org.joda.time.DateTimeZone; … … 45 46 46 47 public class HarvestWorker { 48 private static final String USER_AGENT = "OAI-Harvester/1.0.0"; 47 49 private static final String OAI_NS = "http://www.openarchives.org/OAI/2.0/"; 48 50 private static final String ARG_METADATA_PREFIX = "metadataPrefix"; … … 68 70 LoggerFactory.getLogger(HarvestWorker.class); 69 71 private final AbstractHarvester harvester; 70 private final HttpClient client;72 private final CloseableHttpClient client; 71 73 private final RequestUriBuilder uriBuilder; 72 74 private final HeaderImpl header; … … 77 79 HarvestWorker(AbstractHarvester harvester) { 78 80 this.harvester = harvester; 79 // http client 80 client = new DefaultHttpClient(); 81 client.getParams().setParameter(CoreProtocolPNames.USER_AGENT, 82 "eu.clarin.cmdi.oai.Harvester/0.0.1"); 81 82 // create HTTP client 83 // FIXME: get timeout values from somewhere? 84 final int connectTimeout = 30 * 1000; 85 final int socketTimeout = 180 * 1000; 86 client = createHttpClient(connectTimeout, socketTimeout); 87 83 88 // request stuff 84 89 uriBuilder = new RequestUriBuilder(); … … 490 495 } 491 496 request.addHeader(HEADER_CONNECTION, "close"); 492 // FIXME: get timeout values from somewhere? 493 HttpParams params = request.getParams(); 494 params.setParameter(CoreConnectionPNames.SO_TIMEOUT, 180000); 495 params.setParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, 30000); 496 497 final HttpResponse response = client.execute(request); 497 498 498 /* 499 499 * FIXME: the following code need serious re-factoring 500 500 */ 501 boolean close = false; 501 CloseableHttpResponse response = null; 502 boolean close = true; 502 503 try { 504 response = client.execute(request, new BasicHttpContext()); 503 505 job.addToNetworkTime(System.currentTimeMillis() - now); 506 504 507 final int status = response.getStatusLine().getStatusCode(); 505 508 if (status == HttpStatus.SC_OK) { 506 509 delay = 0; 510 close = false; 507 511 return new Response(job, response, harvester); 508 512 } else { … … 511 515 logger.debug("got service unavailable status, retrying " + 512 516 "after {} seconds", delay); 513 close = true;514 517 return null; 515 518 } else { … … 520 523 } 521 524 } catch (IOException e) { 522 close = true;523 525 throw e; 524 526 } catch (XMLStreamException e) { 525 close = true;526 527 throw e; 527 528 } catch (HarvesterException e) { 528 close = true;529 529 throw e; 530 530 } finally { 531 if (close) { 532 /* 533 * try hard to release HTTP client resources ... 534 */ 535 try { 536 EntityUtils.consume(response.getEntity()); 537 } catch (IOException ex) { 538 /* IGNORE */ 539 } 540 541 /* make sure to release allocated resources */ 542 HttpClientUtils.closeQuietly(response); 543 544 if (request != null) { 545 request.abort(); 546 } 531 if (close && (response != null)) { 532 /* make sure. response is closed ... */ 533 response.close(); 547 534 } 548 535 } … … 561 548 delay = Integer.parseInt(v) * 1000; 562 549 } catch (NumberFormatException e) { 563 try {564 550 Date date = DateUtils.parseDate(v); 565 delay = date.getTime() - System.currentTimeMillis(); 566 } catch (DateParseException f) { 567 /* IGNORE */ 568 } 551 if (date != null) { 552 delay = date.getTime() - System.currentTimeMillis(); 553 } 569 554 } 570 555 } … … 615 600 } 616 601 602 603 private static CloseableHttpClient createHttpClient(int connectTimeout, 604 int socketTimeout) { 605 final PoolingHttpClientConnectionManager manager = 606 new PoolingHttpClientConnectionManager(); 607 manager.setDefaultMaxPerRoute(8); 608 manager.setMaxTotal(128); 609 610 final SocketConfig socketConfig = SocketConfig.custom() 611 .setSoReuseAddress(true) 612 .setSoLinger(0) 613 .build(); 614 615 final RequestConfig requestConfig = RequestConfig.custom() 616 .setAuthenticationEnabled(false) 617 .setRedirectsEnabled(true) 618 .setMaxRedirects(4) 619 .setCircularRedirectsAllowed(false) 620 .setCookieSpec(CookieSpecs.IGNORE_COOKIES) 621 .setConnectTimeout(connectTimeout) 622 .setSocketTimeout(socketTimeout) 623 .setConnectionRequestTimeout(0) /* infinite */ 624 .setStaleConnectionCheckEnabled(false) 625 .build(); 626 627 return HttpClients.custom() 628 .setUserAgent(USER_AGENT) 629 .setConnectionManager(manager) 630 .setDefaultSocketConfig(socketConfig) 631 .setDefaultRequestConfig(requestConfig) 632 .setConnectionReuseStrategy(new NoConnectionReuseStrategy()) 633 .build(); 634 } 635 617 636 } // class HarvestWorker -
OAIHarvester/trunk/OAIHarvester/src/main/java/eu/clarin/cmdi/oai/harvester/impl/Response.java
r5367 r5458 19 19 20 20 import org.apache.http.HttpEntity; 21 import org.apache.http.HttpResponse; 22 import org.apache.http.client.utils.HttpClientUtils; 23 import org.apache.http.util.EntityUtils; 21 import org.apache.http.client.methods.CloseableHttpResponse; 24 22 import org.codehaus.stax2.XMLStreamReader2; 25 23 import org.slf4j.Logger; … … 84 82 private static final Logger logger = 85 83 LoggerFactory.getLogger(Response.class); 86 private final HttpResponse response;84 private final CloseableHttpResponse response; 87 85 private final long now = System.currentTimeMillis(); 88 86 private final CountingInputStream stream; … … 90 88 91 89 92 Response(HarvestJobImpl job, HttpResponse response, AbstractHarvester harvester)90 Response(HarvestJobImpl job, CloseableHttpResponse response, AbstractHarvester harvester) 93 91 throws IOException, XMLStreamException, HarvesterException { 94 92 if (job == null) { … … 165 163 } 166 164 167 /* 168 * try hard to release HTTP client resources ... 169 */ 165 /* make sure to release allocated resources */ 170 166 try { 171 EntityUtils.consume(response.getEntity());172 } catch (IOException e x) {167 response.close(); 168 } catch (IOException e) { 173 169 /* IGNORE */ 174 170 } 175 176 /* make sure to release allocated resources */177 HttpClientUtils.closeQuietly(response);178 171 } 179 172 … … 382 375 while (reader.isCharacters()) { 383 376 String s = reader.getText(); 384 if ( s != null) {377 if ((s != null) && !s.isEmpty()) { 385 378 sb.append(s); 386 379 }
Note: See TracChangeset
for help on using the changeset viewer.