Changeset 5979
- Timestamp:
- 02/16/15 13:42:07 (9 years ago)
- Location:
- vlo/trunk/vlo-importer/src
- Files:
-
- 7 edited
Legend:
- Unmodified
- Added
- Removed
-
vlo/trunk/vlo-importer/src/main/java/eu/clarin/cmdi/vlo/importer/CMDIParserVTDXML.java
r5849 r5979 15 15 import java.util.List; 16 16 import java.util.Map; 17 import java.util.regex.Matcher; 18 import java.util.regex.Pattern; 17 19 import org.apache.commons.io.IOUtils; 18 20 import org.slf4j.Logger; … … 21 23 public class CMDIParserVTDXML implements CMDIDataProcessor { 22 24 private final Map<String, PostProcessor> postProcessors; 25 private final Boolean useLocalXSDCache; 26 private static final Pattern PROFILE_ID_PATTERN = Pattern.compile(".*(clarin.eu:cr1:p_[0-9]+).*"); 23 27 private final static Logger LOG = LoggerFactory.getLogger(CMDIParserVTDXML.class); 24 28 25 29 private static final String DEFAULT_LANGUAGE = "und"; 26 30 27 public CMDIParserVTDXML(Map<String, PostProcessor> postProcessors ) {31 public CMDIParserVTDXML(Map<String, PostProcessor> postProcessors, Boolean useLocalXSDCache) { 28 32 this.postProcessors = postProcessors; 33 this.useLocalXSDCache = useLocalXSDCache; 29 34 } 30 35 … … 39 44 40 45 VTDNav nav = vg.getNav(); 41 FacetMapping facetMapping = getFacetMapping(nav.cloneNav() , file.getAbsolutePath());46 FacetMapping facetMapping = getFacetMapping(nav.cloneNav()); 42 47 43 48 if(facetMapping.getFacets().isEmpty()){ … … 62 67 * Extracts valid XML patterns for all facet definitions 63 68 * @param nav VTD Navigator 64 * @param cmdiFilePath Absolute path of the XML file for which nav was created65 69 * @return the facet mapping used to map meta data to facets 66 70 * @throws VTDException 67 71 */ 68 private FacetMapping getFacetMapping(VTDNav nav , String cmdiFilePath) throws VTDException {69 String xsd = extractXsd(nav);70 if ( xsd == null) {72 private FacetMapping getFacetMapping(VTDNav nav) throws VTDException { 73 String profileId = extractXsd(nav); 74 if (profileId == null) { 71 75 throw new RuntimeException("Cannot get xsd schema so cannot get a proper mapping. Parse failed!"); 72 }73 if (xsd.indexOf("http") != xsd.lastIndexOf("http")){74 LOG.info("No valid CMDI schema URL was extracted. This is an indication of a broken CMDI file (like false content in //MdProfile element). {}", cmdiFilePath);75 76 } 76 77 String facetConceptsFile = MetadataImporter.config.getFacetConceptsFile(); … … 79 80 facetConceptsFile = "/facetConcepts.xml"; 80 81 } 81 return FacetMappingFactory.getFacetMapping(facetConceptsFile, xsd);82 return FacetMappingFactory.getFacetMapping(facetConceptsFile, profileId, useLocalXSDCache); 82 83 } 83 84 … … 85 86 * Try two approaches to extract the XSD schema information from the CMDI file 86 87 * @param nav VTD Navigator 87 * @return URLof CMDI schema, or null if neither the CMDI header nor the XMLSchema-instance's attributes contained the information88 * @return ID of CMDI schema, or null if neither the CMDI header nor the XMLSchema-instance's attributes contained the information 88 89 * @throws VTDException 89 90 */ 90 91 String extractXsd(VTDNav nav) throws VTDException { 91 String xsd = getXsdFromHeader(nav);92 if ( xsd== null) {93 xsd = getXsdFromSchemaLocation(nav);94 } 95 return xsd;92 String profileID = getProfileIdFromHeader(nav); 93 if (profileID == null) { 94 profileID = getProfileIdFromSchemaLocation(nav); 95 } 96 return profileID; 96 97 } 97 98 … … 99 100 * Extract XSD schema information from CMDI header (using element //Header/MdProfile) 100 101 * @param nav VTD Navigator 101 * @return URL toCMDI schema, or null if content of //Header/MdProfile element could not be read102 * @return ID of CMDI schema, or null if content of //Header/MdProfile element could not be read 102 103 * @throws XPathParseException 103 104 * @throws XPathEvalException 104 105 * @throws NavException 105 106 */ 106 private String getXsdFromHeader(VTDNav nav) throws XPathParseException, XPathEvalException, NavException { 107 String result = null; 107 private String getProfileIdFromHeader(VTDNav nav) throws XPathParseException, XPathEvalException, NavException { 108 108 nav.toElement(VTDNav.ROOT); 109 109 AutoPilot ap = new AutoPilot(nav); … … 111 111 ap.selectXPath("/c:CMD/c:Header/c:MdProfile/text()"); 112 112 int index = ap.evalXPath(); 113 String profileId = null; 113 114 if (index != -1) { 114 String profileId = nav.toString(index).trim(); 115 result = MetadataImporter.config.getComponentRegistryProfileSchema(profileId); 116 } 117 return result; 115 profileId = nav.toString(index).trim(); 116 } 117 return profileId; 118 118 } 119 119 … … 121 121 * Extract XSD schema information from schemaLocation or noNamespaceSchemaLocation attributes 122 122 * @param nav VTD Navigator 123 * @return URL toCMDI schema, or null if attributes don't exist123 * @return ID of CMDI schema, or null if attributes don't exist 124 124 * @throws NavException 125 125 */ 126 private String get XsdFromSchemaLocation(VTDNav nav) throws NavException {126 private String getProfileIdFromSchemaLocation(VTDNav nav) throws NavException { 127 127 String result = null; 128 128 nav.toElement(VTDNav.ROOT); … … 137 137 } 138 138 } 139 return result; 139 140 // extract profile ID 141 if(result != null) { 142 Matcher m = PROFILE_ID_PATTERN.matcher(result); 143 if(m.find()) 144 return m.group(1); 145 } 146 return null; 140 147 } 141 148 -
vlo/trunk/vlo-importer/src/main/java/eu/clarin/cmdi/vlo/importer/FacetMappingFactory.java
r5197 r5979 35 35 private FacetMappingFactory() { 36 36 } 37 38 public static FacetMapping getFacetMapping(String facetConceptsFile, String xsd ) {39 return INSTANCE.getOrCreateMapping(facetConceptsFile, xsd );37 38 public static FacetMapping getFacetMapping(String facetConceptsFile, String xsd, Boolean useLocalXSDCache) { 39 return INSTANCE.getOrCreateMapping(facetConceptsFile, xsd, useLocalXSDCache); 40 40 } 41 41 … … 48 48 * @param facetConcepts name of the facet concepts file 49 49 * @param xsd url of xml schema of cmdi profile 50 * @param useLocalXSDCache use local XML schema files instead of accessing the component registry 50 51 * 51 52 * @return facet concept mapping 52 53 */ 53 private FacetMapping getOrCreateMapping(String facetConcepts, String xsd ) {54 private FacetMapping getOrCreateMapping(String facetConcepts, String xsd, Boolean useLocalXSDCache) { 54 55 // check if concept mapping has already been created 55 56 FacetMapping result = mapping.get(xsd); 56 57 if (result == null) { 57 result = createMapping(facetConcepts, xsd );58 result = createMapping(facetConcepts, xsd, useLocalXSDCache); 58 59 mapping.put(xsd, result); 59 60 } … … 69 70 * @param facetConcepts name of the facet concepts file 70 71 * @param xsd url of xml schema of cmdi profile 72 * @param useLocalXSDCache use local XML schema files instead of accessing the component registry 71 73 * 72 74 * @return the facet mapping used to map meta data to facets 73 75 */ 74 private FacetMapping createMapping(String facetConcepts, String xsd ) {76 private FacetMapping createMapping(String facetConcepts, String xsd, Boolean useLocalXSDCache) { 75 77 76 78 FacetMapping result = new FacetMapping(); … … 79 81 try { 80 82 //The magic 81 Map<String, List<String>> conceptLinkPathMapping = createConceptLinkPathMapping(xsd );83 Map<String, List<String>> conceptLinkPathMapping = createConceptLinkPathMapping(xsd, useLocalXSDCache); 82 84 Map<String, String> pathConceptLinkMapping = null; 83 85 // Below we put the stuff we found into the configuration class. … … 211 213 * 212 214 * @param xsd URL of XML Schema of some CMDI profile 215 * @param useLocalXSDCache use local XML schema files instead of accessing the component registry 213 216 * @return Map (Data Category -> List of XPath expressions linked to the key 214 217 * data category which can be found in CMDI files with this schema) 215 218 * @throws NavException 216 219 */ 217 private Map<String, List<String>> createConceptLinkPathMapping(String xsd ) throws NavException {220 private Map<String, List<String>> createConceptLinkPathMapping(String xsd, Boolean useLocalXSDCache) throws NavException { 218 221 Map<String, List<String>> result = new HashMap<String, List<String>>(); 219 222 VTDGen vg = new VTDGen(); 220 boolean parseSuccess = vg.parseHttpUrl(xsd, true); 223 boolean parseSuccess; 224 if(useLocalXSDCache) { 225 parseSuccess = vg.parseFile(Thread.currentThread().getContextClassLoader().getResource("testProfiles/"+xsd+".xsd").getPath(), true); 226 } else { 227 parseSuccess = vg.parseHttpUrl(MetadataImporter.config.getComponentRegistryProfileSchema(xsd), true); 228 } 229 221 230 if (!parseSuccess) { 222 231 LOG.error("Cannot create ConceptLink Map from xsd (xsd is probably not reachable): " + xsd + ". All metadata instances that use this xsd will not be imported correctly."); -
vlo/trunk/vlo-importer/src/main/java/eu/clarin/cmdi/vlo/importer/MetadataImporter.java
r5896 r5979 137 137 LOG.info("Deleting data of provider done."); 138 138 } 139 CMDIDataProcessor processor = new CMDIParserVTDXML(POST_PROCESSORS );139 CMDIDataProcessor processor = new CMDIParserVTDXML(POST_PROCESSORS, false); 140 140 List<File> files = getFilesFromDataRoot(dataRoot.getRootFile()); 141 141 for (File file : files) { -
vlo/trunk/vlo-importer/src/test/java/eu/clarin/cmdi/vlo/importer/CMDIDataProcessorTest.java
r5887 r5979 17 17 18 18 private CMDIDataProcessor getDataParser() { 19 return new CMDIParserVTDXML(MetadataImporter.POST_PROCESSORS );19 return new CMDIParserVTDXML(MetadataImporter.POST_PROCESSORS, true); 20 20 } 21 21 -
vlo/trunk/vlo-importer/src/test/java/eu/clarin/cmdi/vlo/importer/CMDIParserVTDXMLTest.java
r5347 r5979 19 19 content += "</CMD>\n"; 20 20 String xsd = getXsd(content); 21 assertEquals(" http://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/profiles/clarin.eu:cr1:p_1288172614026/xsd", xsd);21 assertEquals("clarin.eu:cr1:p_1288172614026", xsd); 22 22 } 23 23 … … 30 30 content += "</CMD>\n"; 31 31 String xsd = getXsd(content); 32 assertEquals(" http://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/profiles/clarin.eu:cr1:p_1288172614026/xsd", xsd);32 assertEquals("clarin.eu:cr1:p_1288172614026", xsd); 33 33 } 34 34 … … 41 41 content += "</CMD>\n"; 42 42 String xsd = getXsd(content); 43 assertEquals( "http://www.meertens.knaw.nl/oai/cmdi/diddd_sub_location_profile.xsd", xsd);43 assertEquals(null, xsd); 44 44 } 45 45 … … 59 59 vg.parse(true); 60 60 VTDNav nav = vg.getNav(); 61 CMDIParserVTDXML parser = new CMDIParserVTDXML(null );61 CMDIParserVTDXML parser = new CMDIParserVTDXML(null, true); 62 62 String xsd = parser.extractXsd(nav); 63 63 return xsd; -
vlo/trunk/vlo-importer/src/test/java/eu/clarin/cmdi/vlo/importer/FacetMappingFactoryTest.java
r5143 r5979 16 16 private final static String FACETCONCEPTS_FILENAME = "/facetConceptsTest.xml"; 17 17 18 private final static String IMDI_PROFILE_URL = 19 "http://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/profiles/clarin.eu:cr1:p_1271859438204/xsd"; 20 private final static String OLAC_PROFILE_URL = 21 "http://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/profiles/clarin.eu:cr1:p_1288172614026/xsd"; 22 private final static String LRT_PROFILE_URL = 23 "http://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/profiles/clarin.eu:cr1:p_1289827960126/xsd"; 24 private final static String ID_PROFILE_URL = 25 "http://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/profiles/clarin.eu:cr1:p_1290431694629/xsd"; 26 private final static String TEXTCORPUSPROFILE_PROFILE_URL = 27 "http://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/profiles/clarin.eu:cr1:p_1290431694580/xsd"; 18 private final static String IMDI_PROFILE_ID = "clarin.eu:cr1:p_1271859438204"; 19 private final static String OLAC_PROFILE_ID = "clarin.eu:cr1:p_1288172614026"; 20 private final static String LRT_PROFILE_ID = "clarin.eu:cr1:p_1289827960126"; 21 private final static String ID_PROFILE_ID = "clarin.eu:cr1:p_1290431694629"; 22 private final static String TEXTCORPUSPROFILE_PROFILE_ID = "clarin.eu:cr1:p_1290431694580"; 28 23 29 24 private final static Logger LOG = LoggerFactory.getLogger(FacetMappingFactoryTest.class); … … 32 27 public void testGetImdiMapping() { 33 28 FacetMapping facetMapping = FacetMappingFactory 34 .getFacetMapping(FACETCONCEPTS_FILENAME, IMDI_PROFILE_ URL);29 .getFacetMapping(FACETCONCEPTS_FILENAME, IMDI_PROFILE_ID, true); 35 30 36 31 List<FacetConfiguration> facets = facetMapping.getFacets(); … … 182 177 public void testGetOlacMapping() { 183 178 FacetMapping facetMapping = FacetMappingFactory 184 .getFacetMapping(FACETCONCEPTS_FILENAME, OLAC_PROFILE_ URL);179 .getFacetMapping(FACETCONCEPTS_FILENAME, OLAC_PROFILE_ID, true); 185 180 186 181 List<FacetConfiguration> facets = facetMapping.getFacets(); … … 311 306 public void testGetLrtMapping() { 312 307 FacetMapping facetMapping = FacetMappingFactory 313 .getFacetMapping(FACETCONCEPTS_FILENAME, LRT_PROFILE_ URL);308 .getFacetMapping(FACETCONCEPTS_FILENAME, LRT_PROFILE_ID, true); 314 309 315 310 List<FacetConfiguration> facets = facetMapping.getFacets(); … … 439 434 440 435 FacetMapping facetMapping = FacetMappingFactory 441 .getFacetMapping(FACETCONCEPTS_FILENAME, ID_PROFILE_ URL);436 .getFacetMapping(FACETCONCEPTS_FILENAME, ID_PROFILE_ID, true); 442 437 443 438 List<FacetConfiguration> facets = facetMapping.getFacets(); … … 468 463 public void testStringBasedBlacklisting() { 469 464 FacetMapping facetMapping = FacetMappingFactory 470 .getFacetMapping(FACETCONCEPTS_FILENAME, TEXTCORPUSPROFILE_PROFILE_ URL);465 .getFacetMapping(FACETCONCEPTS_FILENAME, TEXTCORPUSPROFILE_PROFILE_ID, true); 471 466 List<FacetConfiguration> facets = facetMapping.getFacets(); 472 467 -
vlo/trunk/vlo-importer/src/test/java/eu/clarin/cmdi/vlo/importer/MetadataImporterTest.java
r5197 r5979 271 271 dataRoot.getOriginName()); 272 272 CMDIDataProcessor processor = new 273 CMDIParserVTDXML(POST_PROCESSORS );273 CMDIParserVTDXML(POST_PROCESSORS, true); 274 274 List<File> files = 275 275 getFilesFromDataRoot(dataRoot.getRootFile());
Note: See TracChangeset
for help on using the changeset viewer.