Changeset 6811 for vlo/trunk/vlo-importer/src/main/java/eu/clarin/cmdi/vlo/importer/OrganisationPostProcessor.java
- Timestamp:
- 11/17/15 11:51:11 (9 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
vlo/trunk/vlo-importer/src/main/java/eu/clarin/cmdi/vlo/importer/OrganisationPostProcessor.java
r6020 r6811 1 1 package eu.clarin.cmdi.vlo.importer; 2 2 3 import java.io.IOException;4 import java.io.InputStream;5 import java.net.MalformedURLException;6 3 import java.util.Arrays; 7 import java.util.HashMap;8 4 import java.util.List; 9 import java.util.Map;10 import javax.xml.parsers.DocumentBuilder;11 import javax.xml.parsers.DocumentBuilderFactory;12 import javax.xml.parsers.ParserConfigurationException;13 import javax.xml.xpath.XPath;14 import javax.xml.xpath.XPathConstants;15 import javax.xml.xpath.XPathExpressionException;16 import javax.xml.xpath.XPathFactory;17 import org.w3c.dom.Document;18 import org.w3c.dom.Node;19 import org.w3c.dom.NodeList;20 import org.xml.sax.SAXException;21 5 22 public class OrganisationPostProcessor implements PostProcessor { 23 24 private static Map<String, String> organisationNamesMap = null; 25 6 public class OrganisationPostProcessor extends PostProcessorsWithControlledVocabulary{ 7 26 8 /** 27 9 * Splits values for organisation facet at delimiter ';' and replaces … … 37 19 String[] splitArray = normalizeInputString(value).split(";"); 38 20 for (int i = 0; i < splitArray.length; i++) { 39 String orgaName = splitArray[i]; 40 if (getNormalizedOrganisationNamesMap().containsKey(normalizeVariant(orgaName))) { 41 splitArray[i] = getNormalizedOrganisationNamesMap().get(normalizeVariant(orgaName)); 42 } 21 String normalizedVal = normalize(splitArray[i], null); 22 if(normalizedVal != null) 23 splitArray[i] = normalizedVal; 43 24 } 44 25 45 26 return Arrays.asList(splitArray); 46 27 } 28 29 @Override 30 public String getNormalizationMapURL() { 31 return MetadataImporter.config.getOrganisationNamesUrl(); 32 } 47 33 48 34 private String normalizeInputString(String value) { … … 54 40 } 55 41 56 private Map<String, String> getNormalizedOrganisationNamesMap() {57 if (organisationNamesMap == null) {58 try {59 // load records from file, in the future this should be loaded from CLAVAS directly and the file only used as fallback60 organisationNamesMap = createControlledVocabularyMap(MetadataImporter.config.getOrganisationNamesUrl());61 } catch (Exception e) {62 throw new RuntimeException("Cannot instantiate postProcessor:", e);63 }64 }65 return organisationNamesMap;66 }67 42 68 private Map<String, String> createControlledVocabularyMap(String urlToVocabularyFile) throws ParserConfigurationException, MalformedURLException, SAXException, XPathExpressionException, IOException { 69 Map<String, String> result = new HashMap<String, String>(); 70 DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance(); 71 domFactory.setNamespaceAware(true); 72 73 InputStream mappingFileAsStream; 74 mappingFileAsStream = NationalProjectPostProcessor.class.getResourceAsStream(urlToVocabularyFile); 75 76 DocumentBuilder builder = domFactory.newDocumentBuilder(); 77 Document doc = builder.parse(mappingFileAsStream); 78 XPath xpath = XPathFactory.newInstance().newXPath(); 79 NodeList nodeList = (NodeList) xpath.evaluate("//Organisation", doc, XPathConstants.NODESET); 80 for (int i = 0; i < nodeList.getLength(); i++) { 81 Node node = nodeList.item(i); 82 String organisationName = node.getAttributes().getNamedItem("name").getTextContent(); 83 NodeList childNodeList = node.getChildNodes(); 84 for (int j = 0; j < childNodeList.getLength(); j++) { 85 String variation = normalizeVariant(childNodeList.item(j).getTextContent()); 86 result.put(variation, organisationName); 87 } 88 } 89 return result; 90 } 43 44 45 91 46 }
Note: See TracChangeset
for help on using the changeset viewer.