Ignore:
Timestamp:
11/17/15 11:51:11 (9 years ago)
Author:
davor.ostojic@oeaw.ac.at
Message:

#773 Building uniform mapping file format for PostProcessing?

File:
1 edited

Legend:

Unmodified
Added
Removed
  • vlo/trunk/vlo-importer/src/main/java/eu/clarin/cmdi/vlo/importer/OrganisationPostProcessor.java

    r6020 r6811  
    11package eu.clarin.cmdi.vlo.importer;
    22
    3 import java.io.IOException;
    4 import java.io.InputStream;
    5 import java.net.MalformedURLException;
    63import java.util.Arrays;
    7 import java.util.HashMap;
    84import java.util.List;
    9 import java.util.Map;
    10 import javax.xml.parsers.DocumentBuilder;
    11 import javax.xml.parsers.DocumentBuilderFactory;
    12 import javax.xml.parsers.ParserConfigurationException;
    13 import javax.xml.xpath.XPath;
    14 import javax.xml.xpath.XPathConstants;
    15 import javax.xml.xpath.XPathExpressionException;
    16 import javax.xml.xpath.XPathFactory;
    17 import org.w3c.dom.Document;
    18 import org.w3c.dom.Node;
    19 import org.w3c.dom.NodeList;
    20 import org.xml.sax.SAXException;
    215
    22 public class OrganisationPostProcessor implements PostProcessor {
    23 
    24     private static Map<String, String> organisationNamesMap = null;
    25 
     6public class OrganisationPostProcessor extends PostProcessorsWithControlledVocabulary{
     7         
    268    /**
    279     * Splits values for organisation facet at delimiter ';' and replaces
     
    3719        String[] splitArray = normalizeInputString(value).split(";");
    3820        for (int i = 0; i < splitArray.length; i++) {
    39             String orgaName = splitArray[i];
    40             if (getNormalizedOrganisationNamesMap().containsKey(normalizeVariant(orgaName))) {
    41                 splitArray[i] = getNormalizedOrganisationNamesMap().get(normalizeVariant(orgaName));
    42             }
     21                String normalizedVal = normalize(splitArray[i], null);
     22                if(normalizedVal != null)
     23                splitArray[i] = normalizedVal;
    4324        }
    4425       
    4526        return Arrays.asList(splitArray);
    4627    }
     28   
     29        @Override
     30        public String getNormalizationMapURL() {
     31                return MetadataImporter.config.getOrganisationNamesUrl();
     32        }
    4733   
    4834    private String normalizeInputString(String value) {
     
    5440    }
    5541
    56     private Map<String, String> getNormalizedOrganisationNamesMap() {
    57         if (organisationNamesMap == null) {
    58             try {
    59                 // load records from file, in the future this should be loaded from CLAVAS directly and the file only used as fallback
    60                 organisationNamesMap = createControlledVocabularyMap(MetadataImporter.config.getOrganisationNamesUrl());
    61             } catch (Exception e) {
    62                 throw new RuntimeException("Cannot instantiate postProcessor:", e);
    63             }
    64         }
    65         return organisationNamesMap;
    66     }
    6742
    68     private Map<String, String> createControlledVocabularyMap(String urlToVocabularyFile) throws ParserConfigurationException, MalformedURLException, SAXException, XPathExpressionException, IOException {
    69         Map<String, String> result = new HashMap<String, String>();
    70         DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
    71         domFactory.setNamespaceAware(true);
    72 
    73         InputStream mappingFileAsStream;
    74         mappingFileAsStream = NationalProjectPostProcessor.class.getResourceAsStream(urlToVocabularyFile);
    75 
    76         DocumentBuilder builder = domFactory.newDocumentBuilder();
    77         Document doc = builder.parse(mappingFileAsStream);
    78         XPath xpath = XPathFactory.newInstance().newXPath();
    79         NodeList nodeList = (NodeList) xpath.evaluate("//Organisation", doc, XPathConstants.NODESET);
    80         for (int i = 0; i < nodeList.getLength(); i++) {
    81             Node node = nodeList.item(i);
    82             String organisationName = node.getAttributes().getNamedItem("name").getTextContent();
    83             NodeList childNodeList = node.getChildNodes();
    84             for (int j = 0; j < childNodeList.getLength(); j++) {
    85                 String variation = normalizeVariant(childNodeList.item(j).getTextContent());
    86                 result.put(variation, organisationName);
    87             }
    88         }
    89         return result;
    90     }
     43       
     44   
     45   
    9146}
Note: See TracChangeset for help on using the changeset viewer.