Changeset 4611


Ignore:
Timestamp:
03/03/14 11:33:20 (10 years ago)
Author:
teckart
Message:

Fix ticket #490: "Sanitise document ID's on import" -> replacing problematic characters with their ASCII code in underscores

Location:
vlo/branches/vlo-3.0/vlo-importer/src
Files:
1 added
3 edited

Legend:

Unmodified
Added
Removed
  • vlo/branches/vlo-3.0/vlo-importer/src/main/java/eu/clarin/cmdi/vlo/importer/MetadataImporter.java

    r4509 r4611  
    7474    final static Map<String, PostProcessor> POST_PROCESSORS = new HashMap<String, PostProcessor>();
    7575    static {
     76        POST_PROCESSORS.put(FacetConstants.FIELD_ID, new IdPostProcessor());
    7677                POST_PROCESSORS.put(FacetConstants.FIELD_CONTINENT, new ContinentNamePostProcessor());
    7778        POST_PROCESSORS.put(FacetConstants.FIELD_COUNTRY, new CountryNamePostProcessor());
  • vlo/branches/vlo-3.0/vlo-importer/src/test/java/eu/clarin/cmdi/vlo/importer/CMDIDataProcessorTest.java

    r4507 r4611  
    7171        CMDIDataProcessor processor = getDataParser();
    7272        CMDIData data = processor.process(cmdiFile);
    73         assertEquals("test-hdl:1839/00-0000-0000-0000-0001-D", data.getId());
     73        assertEquals("test-hdl_58_1839_47_00-0000-0000-0000-0001-D", data.getId());
    7474        List<Resource> resources = data.getMetadataResources();
    7575        assertEquals(3, resources.size());
     
    377377        CMDIDataProcessor processor = getDataParser();
    378378        CMDIData data = processor.process(cmdiFile);
    379         assertEquals("test-hdl:1839/00-0000-0000-0009-294C-9", data.getId());
     379        assertEquals("test-hdl_58_1839_47_00-0000-0000-0009-294C-9", data.getId());
    380380        List<Resource> resources = data.getMetadataResources();
    381381        assertEquals(0, resources.size());
     
    513513        CMDIDataProcessor processor = getDataParser();
    514514        CMDIData data = processor.process(cmdiFile);
    515         assertEquals("test-hdl:1839/00-0000-0000-0009-294C-9", data.getId());
     515        assertEquals("test-hdl_58_1839_47_00-0000-0000-0009-294C-9", data.getId());
    516516        List<Resource> resources = data.getMetadataResources();
    517517        assertEquals(0, resources.size());
     
    584584        CMDIDataProcessor processor = getDataParser();
    585585        CMDIData data = processor.process(cmdiFile);
    586         assertEquals("oai:ailla.utexas.edu:1", data.getId());
     586        assertEquals("oai_58_ailla.utexas.edu_58_1", data.getId());
    587587        List<Resource> resources = data.getMetadataResources();
    588588        assertEquals(0, resources.size());
     
    890890        CMDIDataProcessor processor = getDataParser();
    891891        CMDIData data = processor.process(cmdiFile);
    892         assertEquals("clarin.eu:lrt:433", data.getId());
     892        assertEquals("clarin.eu_58_lrt_58_433", data.getId());
    893893        List<Resource> resources = data.getMetadataResources();
    894894        assertEquals(0, resources.size());
  • vlo/branches/vlo-3.0/vlo-importer/src/test/java/eu/clarin/cmdi/vlo/importer/MetadataImporterTest.java

    r4507 r4611  
    165165        assertEquals(1, docs.size());
    166166        SolrInputDocument doc = docs.get(0);
    167         assertEquals("hdl:11858/00-175C-0000-0000-E180-8", getValue(doc, FacetConstants.FIELD_ID));
     167        assertEquals("hdl_58_11858_47_00-175C-0000-0000-E180-8", getValue(doc, FacetConstants.FIELD_ID));
    168168        assertEquals("L'Est R\u00e9publicain : \u00e9dition du 17 mai 1999", getValue(doc, FacetConstants.FIELD_NAME));
    169169        assertEquals("unknown type|http://hdl.handle.net/11858/00-175C-0000-0000-E180-8?urlappend=/TEI", getValue(doc,
Note: See TracChangeset for help on using the changeset viewer.