Changeset 2826
- Timestamp:
- 04/22/13 13:32:04 (11 years ago)
- Location:
- vlo/trunk/vlo_importer/src
- Files:
-
- 5 edited
Legend:
- Unmodified
- Added
- Removed
-
vlo/trunk/vlo_importer/src/main/java/eu/clarin/cmdi/vlo/config/VloConfig.java
r2810 r2826 162 162 163 163 @Element 164 private static int maxOnHeap = 1000; 164 private static int maxOnHeap = 0; 165 166 @Element 167 private static boolean useMaxFileSize = false; 168 169 @Element 170 private static int maxFileSize = 0; 165 171 166 172 @Element … … 278 284 279 285 /** 286 * Get the value of the useMaxFileSize parameter<br><br> 287 * 288 * For a description of the parameter, refer to the general VLO 289 * documentation. 290 * 291 * @return the value 292 */ 293 public static boolean getUseMaxFileSize (){ 294 return useMaxFileSize; 295 } 296 297 /** 298 * Set the value of the useMaxFileSize parameter<br><br> 299 * 300 * For a description of the parameter, refer to the general VLO 301 * documentation. 302 * 303 * @param param the value 304 */ 305 public static void setUseMaxFileSize (boolean param){ 306 useMaxFileSize = param; 307 } 308 309 /** 310 * Get the value of the maxFileSize parameter<br><br> 311 * 312 * For a description of the parameter, refer to the general VLO 313 * documentation. 314 * 315 * @return the value 316 */ 317 public static int getMaxFileSize (){ 318 return maxFileSize; 319 } 320 321 /** 322 * Set the value of the maxFileSize parameter<br><br> 323 * 324 * For a description of the parameter, refer to the general VLO 325 * documentation. 326 * 327 * @param param the value 328 */ 329 public static void setMaxFileSize (int param){ 330 maxFileSize = param; 331 } 332 333 /** 280 334 * Get the value of the useHandleResolver parameter<br><br> 281 335 * -
vlo/trunk/vlo_importer/src/main/java/eu/clarin/cmdi/vlo/importer/MetadataImporter.java
r2810 r2826 51 51 * Log log log log 52 52 */ 53 pr ivatefinal static Logger LOG = LoggerFactory.getLogger(MetadataImporter.class);53 protected final static Logger LOG = LoggerFactory.getLogger(MetadataImporter.class); 54 54 /** 55 55 * Some place to store errors. … … 88 88 * Just to know what we have already done. 89 89 */ 90 pr ivatefinal Set<String> processedIds = new HashSet<String>();90 protected final Set<String> processedIds = new HashSet<String>(); 91 91 /** 92 92 * Some caching for solr documents (we are more efficient if we ram a whole … … 96 96 97 97 // SOME STATS 98 pr ivateint nrOFDocumentsUpdated;99 pr ivateint nrOfFilesAnalyzed = 0;100 pr ivateint nrOfFilesWithoutId = 0;101 pr ivateint nrOfFilesWithoutDataResources = 0;102 pr ivateint nrOfFilesWithError = 0;98 protected int nrOFDocumentsUpdated; 99 protected int nrOfFilesAnalyzed = 0; 100 protected int nrOfFilesWithoutId = 0; 101 protected int nrOfFilesWithoutDataResources = 0; 102 protected int nrOfFilesWithError = 0; 103 103 104 104 /** … … 131 131 List<File> files = getFilesFromDataRoot(dataRoot.getRootFile()); 132 132 for (File file : files) { 133 LOG.debug("PROCESSING FILE: " + file.getAbsolutePath()); 134 processCmdi(file, dataRoot, processor); 133 if (VloConfig.getUseMaxFileSize() && 134 file.length() > VloConfig.getMaxFileSize()) { 135 LOG.info("Skipping " + file.getAbsolutePath() + " because it is too large."); 136 } else { 137 LOG.debug("PROCESSING FILE: " + file.getAbsolutePath()); 138 processCmdi(file, dataRoot, processor); 139 } 135 140 } 136 141 if (!docs.isEmpty()) { … … 170 175 * @return 171 176 */ 172 pr ivateList<DataRoot> checkDataRoots() {177 protected List<DataRoot> checkDataRoots() { 173 178 List<DataRoot> dataRoots = VloConfig.getDataRoots(); 174 179 for (DataRoot dataRoot : dataRoots) { … … 189 194 * directory 190 195 */ 191 pr ivateList<File> getFilesFromDataRoot(File rootFile) {196 protected List<File> getFilesFromDataRoot(File rootFile) { 192 197 List<File> result = new ArrayList<File>(); 193 198 if (rootFile.isFile()) { … … 226 231 * @throws IOException 227 232 */ 228 pr ivatevoid processCmdi(File file, DataRoot dataOrigin, CMDIDataProcessor processor) throws SolrServerException, IOException {233 protected void processCmdi(File file, DataRoot dataOrigin, CMDIDataProcessor processor) throws SolrServerException, IOException { 229 234 nrOfFilesAnalyzed++; 230 235 CMDIData cmdiData = null; … … 262 267 * @return true if id is acceptable, false otherwise 263 268 */ 264 pr ivateboolean idOk(String id) {269 protected boolean idOk(String id) { 265 270 return id != null && !id.isEmpty(); 266 271 } … … 278 283 * @throws IOException 279 284 */ 280 pr ivatevoid updateDocument(SolrInputDocument solrDocument, CMDIData cmdiData, File file, DataRoot dataOrigin) throws SolrServerException,285 protected void updateDocument(SolrInputDocument solrDocument, CMDIData cmdiData, File file, DataRoot dataOrigin) throws SolrServerException, 281 286 IOException { 282 287 if (!solrDocument.containsKey(FacetConstants.FIELD_COLLECTION)) { … … 318 323 * a type is overwritten and already in the solrDocument we take that type. 319 324 */ 320 pr ivatevoid addResourceData(SolrInputDocument solrDocument, CMDIData cmdiData) {325 protected void addResourceData(SolrInputDocument solrDocument, CMDIData cmdiData) { 321 326 List<Object> fieldValues = solrDocument.containsKey(FacetConstants.FIELD_RESOURCE_TYPE) ? new ArrayList<Object>(solrDocument 322 327 .getFieldValues(FacetConstants.FIELD_RESOURCE_TYPE)) : null; -
vlo/trunk/vlo_importer/src/main/resources/VloConfig.xml
r2814 r2826 4 4 5 5 <maxOnHeap>128</maxOnHeap> 6 7 <useMaxFileSize>false</useMaxFileSize> 8 9 <maxFileSize>20000000</maxFileSize> 6 10 7 11 <useHandleResolver>false</useHandleResolver> -
vlo/trunk/vlo_importer/src/test/java/eu/clarin/cmdi/vlo/config/VloConfigTest.java
r2816 r2826 88 88 89 89 /** 90 * Test the get VloHomeLinkmethod90 * Test the getMaxOnHeap method 91 91 */ 92 92 @Test … … 102 102 103 103 /** 104 * Test the set VloHomeLinkmethod104 * Test the setMaxOnHeap method 105 105 */ 106 106 @Test … … 119 119 120 120 /** 121 * Test the getUseMaxFileSize method 122 */ 123 @Test 124 public void testGetUseMaxFileSize() { 125 126 System.out.println("getUseMaxFileSize"); 127 128 boolean expResult = false; 129 boolean result = VloConfig.getUseMaxFileSize(); 130 131 assertEquals(expResult, result); 132 } 133 134 /** 135 * Test the setSetUseMaxFileSize method 136 */ 137 @Test 138 public void testSetUseMaxFileSize() { 139 140 System.out.println("setUseMaxFileSize"); 141 142 boolean param = true; 143 144 VloConfig.setUseMaxFileSize(param); 145 146 boolean result = VloConfig.getUseMaxFileSize(); 147 148 assertEquals(param, result); 149 } 150 151 /** 152 * Test the getMaxFileSize method 153 */ 154 @Test 155 public void testGetMaxFileSize() { 156 157 System.out.println("getMaxFileSize"); 158 159 int expResult = 20000000; 160 int result = VloConfig.getMaxFileSize(); 161 162 assertEquals(expResult, result); 163 } 164 165 /** 166 * Test the setMaxFileSize method 167 */ 168 @Test 169 public void testSetMaxFileSize() { 170 171 System.out.println("setMaxFileSize"); 172 173 int param = 10000000; 174 175 VloConfig.setMaxFileSize(param); 176 177 int result = VloConfig.getMaxFileSize(); 178 179 assertEquals(param, result); 180 } 181 182 /** 121 183 * Test the getHandleResolver method 122 184 */ … … 124 186 public void testGetUseHandleResolver() { 125 187 126 System.out.println("get MaxOnHeap");188 System.out.println("getUseHandleResolver"); 127 189 128 190 boolean expResult = false; … … 138 200 public void testSetUseHandleResolver() { 139 201 140 System.out.println("set MaxOnHeap");202 System.out.println("setUseHandleResolver"); 141 203 142 204 boolean param = true; -
vlo/trunk/vlo_importer/src/test/java/eu/clarin/cmdi/vlo/importer/MetadataImporterTest.java
r2816 r2826 11 11 import java.util.Collections; 12 12 import java.util.List; 13 import java.util.logging.Level; 14 import java.util.logging.Logger; 13 15 import org.apache.solr.client.solrj.SolrServerException; 14 16 import org.apache.solr.common.SolrInputDocument; … … 236 238 final List<SolrInputDocument> result = new ArrayList<SolrInputDocument>(); 237 239 238 // read configuration in ImporterTestCase.setup; change it now 240 /* 241 * Read configuration in ImporterTestCase.setup and change the setup to 242 * suit the test. 243 */ 239 244 240 245 modifyConfig(rootFile); … … 242 247 MetadataImporter importer; 243 248 importer = new MetadataImporter() { 249 /* 250 * Because in the test, the solr server is not assumed to be 251 * available, override the importer's class startImport method by 252 * leaving out interaction with server. 253 * 254 * By invoking the processCmdi method, the class being defined here 255 * needs to anticipate on an exception possibly thrown by the 256 * processCmdi method invoking the sendDocs method. Please note 257 * however, that the latter method is overriden, and the actual 258 * database is being replaced by an array of documents. 259 */ 244 260 @Override 245 protected void initSolrServer() throws MalformedURLException { 246 //do nothing no solrserver in test 261 void startImport() throws MalformedURLException { 262 263 List<DataRoot> dataRoots = checkDataRoots(); 264 long start = System.currentTimeMillis(); 265 try { 266 267 for (DataRoot dataRoot : dataRoots) { 268 LOG.info("Start of processing: " + 269 dataRoot.getOriginName()); 270 CMDIDataProcessor processor = new 271 CMDIParserVTDXML(POST_PROCESSORS); 272 List<File> files = 273 getFilesFromDataRoot(dataRoot.getRootFile()); 274 for (File file : files) { 275 if (VloConfig.getUseMaxFileSize() 276 && file.length() > 277 VloConfig.getMaxFileSize()) { 278 LOG.info("Skipping " + file.getAbsolutePath() + 279 " because it is too large."); 280 } else { 281 LOG.debug("PROCESSING FILE: " + 282 file.getAbsolutePath()); 283 /* 284 * Anticipate on the solr exception that will 285 * never by raised because sendDocs is overriden 286 * in a suitable way. 287 */ 288 try { 289 processCmdi(file, dataRoot, processor); 290 } catch (SolrServerException ex) { 291 Logger.getLogger(MetadataImporterTest.class.getName()).log(Level.SEVERE, null, ex); 292 } 293 } 294 } 295 if (!docs.isEmpty()) { 296 sendDocs(); 297 } 298 LOG.info("End of processing: " + 299 dataRoot.getOriginName()); 300 } 301 302 } catch (IOException e) { 303 LOG.error("error updating files:\n", e); 304 } finally { 305 306 } 307 long took = (System.currentTimeMillis() - start) / 1000; 308 LOG.info("Found " + nrOfFilesWithoutId + 309 " file(s) without an id. (id is generated based on fileName but that may not be unique)"); 310 LOG.info("Found " + nrOfFilesWithError + 311 " file(s) with errors."); 312 LOG.info("Found " + nrOfFilesWithoutDataResources 313 + " file(s) without data resources (metadata descriptions without resources are ignored)."); 314 LOG.info("Update of " + nrOFDocumentsUpdated + " took " + took + 315 " secs. Total nr of files analyzed " + nrOfFilesAnalyzed); 247 316 } 248 317 318 /* 319 * Replace the server's database by a document array 320 */ 249 321 @Override 250 protected void sendDocs() throws SolrServerException,IOException {251 //overriding here so we can test the docs322 protected void sendDocs() throws IOException { 323 252 324 result.addAll(this.docs); 253 325 docs = new ArrayList<SolrInputDocument>(); … … 266 338 dataRoot.setPrefix("http://example.com"); 267 339 VloConfig.setDataRoots(Collections.singletonList(dataRoot)); 268 269 /**270 * Please observe that if the deleteAllFirst parameter is true, the271 * startImport method in the MetaDataImporter class will refer to an272 * instance of the solr server. Because in this test initSolrServer273 * method is empty, no solr server is create. Therefore, the value of274 * the deleteAllFirst parameter needs to be false.275 */276 VloConfig.setDeleteAllFirst(false);277 340 } 278 341
Note: See TracChangeset
for help on using the changeset viewer.