Changeset 1906 for OAIHarvester
- Timestamp:
- 04/24/12 18:29:12 (12 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
OAIHarvester/trunk/OAIHarvester/src/test/java/eu/clarin/cmdi/oai/harvester/HarvesterTest.java
r1173 r1906 8 8 import java.util.Date; 9 9 import java.util.List; 10 import java.util.Properties; 10 11 import java.util.zip.ZipEntry; 11 12 import java.util.zip.ZipException; … … 17 18 import javax.xml.stream.XMLStreamWriter; 18 19 20 import org.joda.time.DateTime; 21 import org.joda.time.format.DateTimeFormatter; 22 import org.joda.time.format.ISODateTimeFormat; 23 19 24 import eu.clarin.cmdi.oai.harvester.impl.SimpleHarvester; 20 25 import eu.clarin.cmdi.oai.harvester.util.XMLStreamCopier; … … 24 29 private static XMLOutputFactory factory = 25 30 XMLOutputFactory.newInstance(); 31 private static final DateTimeFormatter fmt = 32 ISODateTimeFormat.basicDateTimeNoMillis().withZoneUTC(); 26 33 27 34 private static class MyHarvestHandler extends HarvestHandlerAdapter { 28 35 private ZipOutputStream output; 36 private boolean saveHeader; 29 37 private int reqNum; 30 38 private String prefix; 31 39 private byte[] buffer = new byte[8192]; 32 40 33 public MyHarvestHandler(File file ) {41 public MyHarvestHandler(File file, boolean saveHeader) { 34 42 try { 43 this.saveHeader = saveHeader; 35 44 FileOutputStream fos = new FileOutputStream(file); 36 45 output = new ZipOutputStream(fos); … … 130 139 System.err.println(" Sets: " + header.getSets()); 131 140 } 132 String filename = header.getIdentifier();133 filename = replaceBadChars(filename) + ".xml";134 if (prefix != null) {135 filename = replaceBadChars(prefix) + "/" + filename;136 }137 141 try { 138 ZipEntry entry = new ZipEntry(filename); 139 entry.setTime(header.getDatestamp().getTime()); 140 output.putNextEntry(entry); 141 XMLStreamWriter writer = 142 factory.createXMLStreamWriter(output); 143 XMLStreamCopier.copy(reader, writer); 144 writer.flush(); 145 writer.close(); 146 output.closeEntry(); 147 output.flush(); 148 reader.close(); 142 if (!header.isDeleted()) { 143 String filename = makeFileName(header.getIdentifier(), 144 prefix, "xml"); 145 ZipEntry entry = new ZipEntry(filename); 146 entry.setTime(header.getDatestamp().getTime()); 147 output.putNextEntry(entry); 148 XMLStreamWriter writer = 149 factory.createXMLStreamWriter(output); 150 XMLStreamCopier.copy(reader, writer); 151 writer.flush(); 152 writer.close(); 153 output.closeEntry(); 154 output.flush(); 155 reader.close(); 156 } 157 158 if (saveHeader) { 159 String filename = makeFileName(header.getIdentifier(), 160 prefix, "oai"); 161 ZipEntry entry = new ZipEntry(filename); 162 entry.setTime(header.getDatestamp().getTime()); 163 output.putNextEntry(entry); 164 Properties props = new Properties(); 165 props.put("datestamp", 166 fmt.print(new DateTime(header.getDatestamp()))); 167 if (header.isDeleted()) { 168 props.put("isDeleted", "true"); 169 } 170 if (header.getSets() != null) { 171 StringBuilder sb = new StringBuilder(); 172 for (String set : header.getSets()) { 173 if (sb.length() > 0) { 174 sb.append(", "); 175 } 176 sb.append(set); 177 } 178 props.put("sets", sb.toString()); 179 } 180 props.store(output, null); 181 output.closeEntry(); 182 output.flush(); 183 } 149 184 } catch (Exception e) { 150 185 e.printStackTrace(); … … 180 215 } 181 216 182 private String replaceBadChars(String s) { 183 return s.replace(':', '_').replace('.', '_').replace('\\', '_') 184 .replace('/', '_'); 217 private static String makeFileName(String identifier, String prefix, 218 String extension) { 219 StringBuilder sb = new StringBuilder(); 220 if (prefix != null) { 221 sanitizedAppend(sb, prefix); 222 sb.append('/'); 223 } 224 sanitizedAppend(sb, identifier); 225 sb.append('.'); 226 sb.append(extension); 227 return sb.toString(); 228 } 229 230 private static void sanitizedAppend(StringBuilder sb, String s) { 231 for (int i = 0; i < s.length(); i++ ) { 232 char ch = s.charAt(i); 233 switch (ch) { 234 case ':': 235 /* FALL-THROUGH */ 236 case '.': 237 /* FALL-THROUGH */ 238 case '/': 239 /* FALL-THROUGH */ 240 case '\\': 241 sb.append("_"); 242 break; 243 default: 244 sb.append(ch); 245 } 246 } 185 247 } 186 248 }; 187 249 188 250 public void run(String[] args) { 189 251 if (args.length < 2) { … … 192 254 } 193 255 final String repos = args[0]; 194 MyHarvestHandler handler = new MyHarvestHandler(new File(args[1])); 256 MyHarvestHandler handler = 257 new MyHarvestHandler(new File(args[1]), false); 195 258 196 259 Harvester harvester = SimpleHarvester.newInstance();
Note: See TracChangeset
for help on using the changeset viewer.