Changeset 1163 for OAIHarvester
- Timestamp:
- 03/11/11 09:52:27 (13 years ago)
- Location:
- OAIHarvester/trunk/OAIHarvester/src
- Files:
-
- 7 added
- 7 edited
Legend:
- Unmodified
- Added
- Removed
-
OAIHarvester/trunk/OAIHarvester/src/main/java/eu/clarin/cmdi/oai/harvester/Harvester.java
r1148 r1163 3 3 import java.net.URI; 4 4 5 import eu.clarin.cmdi.oai.harvester.ext.DescriptionParser; 6 5 7 public interface Harvester { 8 9 public void registerDescriptionParser(DescriptionParser parser) 10 throws HarvesterException; 6 11 7 12 public HarvestJob createJob(URI repositoryURI, HarvestHandler handler) -
OAIHarvester/trunk/OAIHarvester/src/main/java/eu/clarin/cmdi/oai/harvester/Repository.java
r1128 r1163 1 1 package eu.clarin.cmdi.oai.harvester; 2 import java.util.Date; 2 3 import java.util.List; 3 4 … … 25 26 public List<String> getAdminEmail(); 26 27 27 public StringgetEarliestTimestamp();28 public Date getEarliestTimestamp(); 28 29 29 30 public DeletedNotion getDeletedNotion(); … … 33 34 public int getCompressionMask(); 34 35 36 public List<Description> getDescriptions(); 37 35 38 } // interface Repository -
OAIHarvester/trunk/OAIHarvester/src/main/java/eu/clarin/cmdi/oai/harvester/impl/AbstractHarvester.java
r1149 r1163 4 4 import java.net.URI; 5 5 import java.net.URISyntaxException; 6 import java.util.ArrayList; 7 import java.util.List; 6 8 7 9 import javax.xml.stream.XMLInputFactory; … … 16 18 import eu.clarin.cmdi.oai.harvester.Harvester; 17 19 import eu.clarin.cmdi.oai.harvester.HarvesterException; 20 import eu.clarin.cmdi.oai.harvester.ext.DescriptionParser; 18 21 19 22 abstract class AbstractHarvester implements Harvester { … … 22 25 private final XMLInputFactory factory; 23 26 // private final XMLValidationSchema schema; 27 private List<DescriptionParser> descriptionParsers = 28 new ArrayList<DescriptionParser>(); 24 29 25 30 protected AbstractHarvester() { … … 41 46 } 42 47 48 public final void registerDescriptionParser(DescriptionParser parser) 49 throws HarvesterException { 50 if (parser == null) { 51 throw new NullPointerException("parser == null"); 52 } 53 if (findDescriptionParser(parser.getNamespaceURI(), 54 parser.getLocalName()) != null) { 55 throw new HarvesterException("description parser for '{" + 56 parser.getNamespaceURI() + "}" + parser.getLocalName() + 57 "' was already registered"); 58 } 59 if (descriptionParsers == null) { 60 descriptionParsers = new ArrayList<DescriptionParser>(); 61 } 62 descriptionParsers.add(parser); 63 } 64 43 65 public abstract HarvestJob createJob(URI repositoryURI, 44 66 HarvestHandler handler) throws HarvesterException; … … 94 116 } 95 117 118 DescriptionParser findDescriptionParser(String namespaceURI, 119 String localName) { 120 if (descriptionParsers != null) { 121 for (DescriptionParser parser : descriptionParsers) { 122 if (namespaceURI.equals(parser.getNamespaceURI()) && 123 localName.equals(parser.getLocalName())) { 124 return parser; 125 } 126 } 127 } 128 return null; 129 } 130 96 131 abstract void doRunJob(HarvestJobImpl job) throws HarvesterException; 97 132 -
OAIHarvester/trunk/OAIHarvester/src/main/java/eu/clarin/cmdi/oai/harvester/impl/HarvestJobImpl.java
r1145 r1163 13 13 import javax.xml.stream.XMLStreamReader; 14 14 15 import eu.clarin.cmdi.oai.harvester.Description; 15 16 import eu.clarin.cmdi.oai.harvester.HarvestHandler; 16 17 import eu.clarin.cmdi.oai.harvester.HarvestJob; … … 109 110 private String protocolVersion; 110 111 private List<String> adminEmail = Collections.emptyList(); 111 private StringearliestTimestamp;112 private Date earliestTimestamp; 112 113 private DeletedNotion deletedNotion; 113 114 private Granularity granularity; 114 115 private int compressionMask; 116 private List<Description> descriptions; 115 117 private List<MetadataFormat> metadataFormats; 116 118 private long totalRecordCount = 0; … … 277 279 return Collections.unmodifiableList(statistics); 278 280 } else { 279 return null;281 return Collections.emptyList(); 280 282 } 281 283 } … … 355 357 } 356 358 357 void setEarliestTimestamp( StringearliestTimestamp) {359 void setEarliestTimestamp(Date earliestTimestamp) { 358 360 this.earliestTimestamp = earliestTimestamp; 359 361 } … … 377 379 int getCompressionMask() { 378 380 return compressionMask; 381 } 382 383 void setDescriptions(List<Description> descriptions) { 384 this.descriptions = descriptions; 379 385 } 380 386 … … 539 545 540 546 @Override 541 public StringgetEarliestTimestamp() {547 public Date getEarliestTimestamp() { 542 548 return earliestTimestamp; 543 549 } … … 560 566 @Override 561 567 public List<String> getAdminEmail() { 562 return adminEmail; 568 return (adminEmail != null) ? 569 Collections.unmodifiableList(adminEmail) : null; 570 } 571 572 @Override 573 public List<Description> getDescriptions() { 574 return (descriptions != null) ? 575 Collections.unmodifiableList(descriptions) : null; 563 576 } 564 577 }; … … 570 583 this.metadataFormats = metadataFormats; 571 584 if (handler != null) { 572 handler.onListMetadataFormats(this.metadataFormats); 585 handler.onListMetadataFormats( 586 Collections.unmodifiableList(this.metadataFormats)); 573 587 } 574 588 } -
OAIHarvester/trunk/OAIHarvester/src/main/java/eu/clarin/cmdi/oai/harvester/impl/HarvestWorker.java
r1150 r1163 25 25 import org.joda.time.format.DateTimeFormatter; 26 26 27 import eu.clarin.cmdi.oai.harvester.Description; 27 28 import eu.clarin.cmdi.oai.harvester.HarvestJob.State; 28 29 import eu.clarin.cmdi.oai.harvester.HarvesterException; … … 31 32 import eu.clarin.cmdi.oai.harvester.Repository.DeletedNotion; 32 33 import eu.clarin.cmdi.oai.harvester.Repository.Granularity; 34 import eu.clarin.cmdi.oai.harvester.ext.DescriptionParser; 33 35 import eu.clarin.cmdi.oai.harvester.impl.HarvestJobImpl.Task; 34 36 … … 187 189 job.setAdminEmail(adminEmail); 188 190 189 s = response.readContent(OAI_NS, "earliestDatestamp", true); 190 job.setEarliestTimestamp(s); 191 /* 192 * defer setting of earliestDatestamp in job until we know 193 * the datestamp granularity of the repository 194 */ 195 final String earliestDatestamp = 196 response.readContent(OAI_NS, "earliestDatestamp", true); 191 197 192 198 s = response.readContent(OAI_NS, "deletedRecord", true); … … 203 209 204 210 s = response.readContent(OAI_NS, "granularity", true); 211 Granularity granularity = null; 205 212 if (GRANULARITY_DAYS.equals(s)) { 206 job.setGranularity(Granularity.DAYS);213 granularity = Granularity.DAYS; 207 214 } else if (GRANULARITY_SECONDS.equals(s)) { 208 job.setGranularity(Granularity.SECONDS);215 granularity = Granularity.SECONDS; 209 216 } else { 210 217 throw new HarvesterException( 211 218 "invalid value for 'granularity': " + s); 212 219 } 220 job.setGranularity(granularity); 221 job.setEarliestTimestamp(parseDate(granularity, earliestDatestamp)); 213 222 214 223 int mask = 0; … … 225 234 job.setCompressionMask(mask); 226 235 236 List<Description> descriptions = null; 227 237 while (response.readStart(OAI_NS, "description", false)) { 228 238 response.consumeWhitespace(); 229 239 final String namespaceURI = response.readNamespaceURI(); 230 System.err.println("DESCRIPTION: URI = " + namespaceURI); 240 final String localName = response.peekElementLocalName(); 241 DescriptionParser parser = harvester.findDescriptionParser( 242 namespaceURI, localName); 243 if (parser != null) { 244 Description desc = 245 parser.parseDescription(response.getXMLStreamReader()); 246 if (desc != null) { 247 if (descriptions == null) { 248 descriptions = new ArrayList<Description>(); 249 } 250 descriptions.add(desc); 251 } 252 } else { 253 System.err.println("skipping DESC " + namespaceURI + ", " + 254 localName); 255 } 231 256 response.readEnd(OAI_NS, "description", true); 257 job.setDescriptions(descriptions); 232 258 } 233 259 response.readEnd(OAI_NS, "Identify"); -
OAIHarvester/trunk/OAIHarvester/src/main/java/eu/clarin/cmdi/oai/harvester/impl/Response.java
r1137 r1163 286 286 } 287 287 return reader.getNamespaceURI(); 288 } 289 290 public String peekElementLocalName() throws XMLStreamException { 291 if (!reader.isStartElement()) { 292 throw new XMLStreamException("not at a start elment event", 293 reader.getLocation()); 294 } 295 return reader.getLocalName(); 288 296 } 289 297 -
OAIHarvester/trunk/OAIHarvester/src/test/java/eu/clarin/cmdi/oai/harvester/HarvesterTest.java
r1147 r1163 89 89 } 90 90 System.err.println(" compression: " + sb.toString()); 91 final List<Description> descs = repository.getDescriptions(); 92 if (descs != null) { 93 for (Description desc : descs) { 94 System.err.println(" description: " + desc); 95 } 96 } 91 97 } 92 98 … … 189 195 190 196 Harvester harvester = SimpleHarvester.newInstance(); 191 harvester.setMaxNetworkRetryCount(16);192 197 try { 198 // configure harvester 199 harvester.setMaxNetworkRetryCount(16); 200 harvester.registerDescriptionParser( 201 new OAIIdentifierDescriptionParser()); 202 harvester.registerDescriptionParser( 203 new OAIDublinCoreDescriptionParser()); 204 193 205 HarvestJob job = harvester.createJob(repos, handler); 194 206 // Calendar cal = Calendar.getInstance(TimeZone.getDefault());
Note: See TracChangeset
for help on using the changeset viewer.