Changeset 6811 for vlo/trunk/vlo-importer/src/main/java/eu/clarin/cmdi/vlo/importer/AvailabilityPostProcessor.java
- Timestamp:
- 11/17/15 11:51:11 (9 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
vlo/trunk/vlo-importer/src/main/java/eu/clarin/cmdi/vlo/importer/AvailabilityPostProcessor.java
r6149 r6811 1 1 package eu.clarin.cmdi.vlo.importer; 2 2 3 import java.io.IOException;4 import java.io.InputStream;5 import java.net.MalformedURLException;6 3 import java.util.ArrayList; 7 import java.util.HashMap;8 4 import java.util.List; 9 import java.util.Map;10 import java.util.regex.Matcher;11 import java.util.regex.Pattern;12 import javax.xml.parsers.DocumentBuilder;13 import javax.xml.parsers.DocumentBuilderFactory;14 import javax.xml.parsers.ParserConfigurationException;15 import javax.xml.xpath.XPath;16 import javax.xml.xpath.XPathConstants;17 import javax.xml.xpath.XPathExpressionException;18 import javax.xml.xpath.XPathFactory;19 import org.w3c.dom.Document;20 import org.w3c.dom.Node;21 import org.w3c.dom.NodeList;22 import org.xml.sax.SAXException;23 5 24 6 /** … … 26 8 * @author teckart 27 9 */ 28 public class AvailabilityPostProcessor implements PostProcessor { 29 30 private static Map<String, String> availabilityMap; 31 private static final Integer MAX_LENGTH = 20; 10 public class AvailabilityPostProcessor extends PostProcessorsWithControlledVocabulary { 11 12 private static final Integer MAX_LENGTH = 20; 32 13 private static final String OTHER_VALUE = "Other"; 14 33 15 34 16 @Override 35 17 public List<String> process(final String value) { 36 String result = value;37 18 List<String> resultList = new ArrayList<String>(); 38 39 if (getLicenseAvailabilityMap().containsKey(value.toLowerCase())) { 40 resultList.add(getLicenseAvailabilityMap().get(value.toLowerCase())); 41 } else { 42 if (result.length() > MAX_LENGTH) { 43 resultList.add(OTHER_VALUE); 44 } else { 45 resultList.add(result.trim()); 46 } 47 } 19 20 resultList.add(normalize(value, value.length() > MAX_LENGTH? OTHER_VALUE : value.trim())); 48 21 49 22 return resultList; 50 23 } 51 24 52 private Map<String, String> getLicenseAvailabilityMap() {53 if (availabilityMap == null) {54 try {55 // load records from file56 availabilityMap = createControlledVocabularyMap(MetadataImporter.config.getLicenseAvailabilityMapUrl());57 } catch (Exception e) {58 throw new RuntimeException("Cannot instantiate postProcessor:", e);59 }60 }61 return availabilityMap;62 }63 25 64 private Map<String, String> createControlledVocabularyMap(String languageNamesUrl) throws ParserConfigurationException, MalformedURLException, SAXException, XPathExpressionException, IOException { 65 Map<String, String> result = new HashMap<String, String>(); 66 DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance(); 67 domFactory.setNamespaceAware(true); 68 69 InputStream mappingFileAsStream; 70 mappingFileAsStream = NationalProjectPostProcessor.class.getResourceAsStream(languageNamesUrl); 71 72 DocumentBuilder builder = domFactory.newDocumentBuilder(); 73 Document doc = builder.parse(mappingFileAsStream); 74 XPath xpath = XPathFactory.newInstance().newXPath(); 75 NodeList nodeList = (NodeList) xpath.evaluate("//Availability", doc, XPathConstants.NODESET); 76 for (int i = 0; i < nodeList.getLength(); i++) { 77 Node node = nodeList.item(i); 78 String availabilityName = node.getAttributes().getNamedItem("name").getTextContent(); 79 NodeList childNodeList = node.getChildNodes(); 80 for (int j = 0; j < childNodeList.getLength(); j++) { 81 String license = childNodeList.item(j).getTextContent().toLowerCase(); 82 result.put(license, availabilityName); 83 } 84 } 85 return result; 86 } 26 @Override 27 public String getNormalizationMapURL() { 28 return MetadataImporter.config.getLicenseAvailabilityMapUrl(); 29 } 87 30 }
Note: See TracChangeset
for help on using the changeset viewer.