Changeset 6811


Ignore:
Timestamp:
11/17/15 11:51:11 (9 years ago)
Author:
davor.ostojic@oeaw.ac.at
Message:

#773 Building uniform mapping file format for PostProcessing?

Location:
vlo/trunk
Files:
4 deleted
16 edited
4 copied

Legend:

Unmodified
Added
Removed
  • vlo/trunk/pom.xml

    r6436 r6811  
    2525        <module>vlo-commons</module>
    2626        <module>vlo-distribution</module>
     27                <module>vlo-vocabularies</module>
    2728    </modules>
    2829
     
    6869        </repository>
    6970    </repositories>
     71       
    7072</project>
  • vlo/trunk/vlo-commons/src/main/java/eu/clarin/cmdi/vlo/FacetConstants.java

    r6666 r6811  
    2727    public static final String FIELD_NATIONAL_PROJECT = "nationalProject";
    2828    public static final String FIELD_KEYWORDS = "keywords";
     29    public static final String FIELD_PROFILE = "profileName";
     30   
    2931    /**
    3032     * Solr pseudo-field that reveals the ranking score
  • vlo/trunk/vlo-commons/src/main/java/eu/clarin/cmdi/vlo/StringUtils.java

    r6199 r6811  
    44import java.util.HashSet;
    55import java.util.Set;
     6import java.util.regex.Matcher;
     7import java.util.regex.Pattern;
    68
    79public final class StringUtils {
    8 
     10       
    911    /**
    1012     * Converts a newlines in to html (&lt;br/&gt;), not using &lt;p&gt; because it renders differently on firefox/safari/chrome/ie etc...
     
    7173        return normalizedString.toString();
    7274    }
     75   
     76    public static String uncapitalizeFirstLetter(String value){
     77                return value.substring(0, 1).toLowerCase() + value.substring(1);
     78        }
     79   
     80    public static String capitalizeFirstLetter(String value){
     81                return value.substring(0, 1).toUpperCase() + value.substring(1);
     82        }
     83       
     84    public static String createStringFromArray(String... values){
     85        String res = "";
     86       
     87        for(String str: values){
     88                res += str;
     89        }
     90       
     91        return res;
     92       
     93    }
     94   
    7395
    7496}
  • vlo/trunk/vlo-commons/src/main/java/eu/clarin/cmdi/vlo/config/VloConfig.java

    r6676 r6811  
    7272
    7373    private String licenseAvailabilityMapUrl;
     74   
     75    private String resourceClassMapUrl;
    7476
    7577    private String countryComponentUrl = "";
     
    10261028        licenseAvailabilityMapUrl = param;
    10271029    }
     1030   
     1031    public String getResourceClassMapUrl() {
     1032                return resourceClassMapUrl;
     1033        }
     1034
     1035        public void setResourceClassMapUrl(String resourceClassMapUrl) {
     1036                this.resourceClassMapUrl = resourceClassMapUrl;
     1037        }
    10281038
    10291039    /**
  • vlo/trunk/vlo-commons/src/main/resources/VloConfig.xml

    r6676 r6811  
    4040    <handleServerUrl>http://hdl.handle.net/</handleServerUrl>
    4141   
    42     <nationalProjectMapping>/nationalProjectsMapping.xml</nationalProjectMapping>
     42    <nationalProjectMapping>nationalProjectsMapping.xml</nationalProjectMapping>
    4343   
    44     <organisationNamesUrl>/OrganisationControlledVocabulary.xml</organisationNamesUrl>
     44    <organisationNamesUrl>OrganisationControlledVocabulary.xml</organisationNamesUrl>
    4545   
    46     <languageNameVariantsUrl>/LanguageNameVariantsMap.xml</languageNameVariantsUrl>
     46    <languageNameVariantsUrl>LanguageNameVariantsMap.xml</languageNameVariantsUrl>
    4747   
    48     <licenseAvailabilityMapUrl>/LicenseAvailabilityMap.xml</licenseAvailabilityMapUrl>
     48    <licenseAvailabilityMapUrl>LicenseAvailabilityMap.xml</licenseAvailabilityMapUrl>
     49   
     50    <resourceClassMapUrl>resourceTypeMap.xml</resourceClassMapUrl>
    4951   
    5052    <!-- resourceAvailabilityMapUrl
  • vlo/trunk/vlo-commons/src/test/java/eu/clarin/cmdi/vlo/config/DefaultVloConfigFactoryTest.java

    r6474 r6811  
    597597        System.out.println("getNationalProjectMapping");
    598598
    599         String expResult = "/nationalProjectsMapping.xml";
     599        String expResult = "nationalProjectsMapping.xml";
    600600        String result = config.getNationalProjectMapping();
    601601
  • vlo/trunk/vlo-distribution/src/main/assembly/installPackageBuilder.xml

    r6462 r6811  
    6565            </excludes>
    6666        </fileSet>
    67         <!-- config files -->
     67         <!-- uniform maps -->
    6868        <fileSet>
    69             <directory>${project.parent.basedir}/vlo-commons/src/main/resources</directory>
     69            <directory>${project.parent.basedir}/vlo-vocabularies/maps/uniform_maps</directory>
    7070            <includes>
    71                 <include>*.properties</include>
    7271                <include>*.xml</include>
    73                 <include>*.xsl</include>
    7472            </includes>
    75             <excludes>
    76                 <exclude>VloConfig.xml</exclude>
    77                 <exclude>facetConcepts.xml</exclude>
    78             </excludes>
    7973            <outputDirectory>bin</outputDirectory>
    8074        </fileSet>
    81 
     75               
    8276        <!-- the web application files to be packaged -->
    8377        <!-- war file -->
  • vlo/trunk/vlo-importer/pom.xml

    r6436 r6811  
    5353            </resource>
    5454        </resources>
    55         
     55     
    5656        <plugins>
    5757           
     
    8484                <version>2.3.2</version>
    8585                <configuration>
    86                     <source>1.6</source>
    87                     <target>1.6</target>
     86                    <source>1.7</source>
     87                    <target>1.7</target>
    8888                </configuration>
    8989            </plugin>
     
    131131            <groupId>${project.groupId}</groupId>
    132132            <artifactId>vlo-commons</artifactId>
     133            <version>${project.version}</version>
     134        </dependency>
     135        <dependency>
     136            <groupId>${project.groupId}</groupId>
     137            <artifactId>vlo-vocabularies</artifactId>
    133138            <version>${project.version}</version>
    134139        </dependency>
  • vlo/trunk/vlo-importer/src/main/java/eu/clarin/cmdi/vlo/importer/AvailabilityPostProcessor.java

    r6149 r6811  
    11package eu.clarin.cmdi.vlo.importer;
    22
    3 import java.io.IOException;
    4 import java.io.InputStream;
    5 import java.net.MalformedURLException;
    63import java.util.ArrayList;
    7 import java.util.HashMap;
    84import java.util.List;
    9 import java.util.Map;
    10 import java.util.regex.Matcher;
    11 import java.util.regex.Pattern;
    12 import javax.xml.parsers.DocumentBuilder;
    13 import javax.xml.parsers.DocumentBuilderFactory;
    14 import javax.xml.parsers.ParserConfigurationException;
    15 import javax.xml.xpath.XPath;
    16 import javax.xml.xpath.XPathConstants;
    17 import javax.xml.xpath.XPathExpressionException;
    18 import javax.xml.xpath.XPathFactory;
    19 import org.w3c.dom.Document;
    20 import org.w3c.dom.Node;
    21 import org.w3c.dom.NodeList;
    22 import org.xml.sax.SAXException;
    235
    246/**
     
    268 * @author teckart
    279 */
    28 public class AvailabilityPostProcessor implements PostProcessor {
    29 
    30     private static Map<String, String> availabilityMap;
    31     private static final Integer MAX_LENGTH = 20;
     10public class AvailabilityPostProcessor extends PostProcessorsWithControlledVocabulary {
     11       
     12        private static final Integer MAX_LENGTH = 20;
    3213    private static final String OTHER_VALUE = "Other";
     14   
    3315
    3416    @Override
    3517    public List<String> process(final String value) {
    36         String result = value;
    3718        List<String> resultList = new ArrayList<String>();
    38 
    39         if (getLicenseAvailabilityMap().containsKey(value.toLowerCase())) {
    40             resultList.add(getLicenseAvailabilityMap().get(value.toLowerCase()));
    41         } else {
    42             if (result.length() > MAX_LENGTH) {
    43                 resultList.add(OTHER_VALUE);
    44             } else {
    45                 resultList.add(result.trim());
    46             }
    47         }
     19       
     20        resultList.add(normalize(value, value.length() > MAX_LENGTH? OTHER_VALUE : value.trim()));
    4821
    4922        return resultList;
    5023    }
    5124
    52     private Map<String, String> getLicenseAvailabilityMap() {
    53         if (availabilityMap == null) {
    54             try {
    55                 // load records from file
    56                 availabilityMap = createControlledVocabularyMap(MetadataImporter.config.getLicenseAvailabilityMapUrl());
    57             } catch (Exception e) {
    58                 throw new RuntimeException("Cannot instantiate postProcessor:", e);
    59             }
    60         }
    61         return availabilityMap;
    62     }
    6325
    64     private Map<String, String> createControlledVocabularyMap(String languageNamesUrl) throws ParserConfigurationException, MalformedURLException, SAXException, XPathExpressionException, IOException {
    65         Map<String, String> result = new HashMap<String, String>();
    66         DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
    67         domFactory.setNamespaceAware(true);
    68 
    69         InputStream mappingFileAsStream;
    70         mappingFileAsStream = NationalProjectPostProcessor.class.getResourceAsStream(languageNamesUrl);
    71 
    72         DocumentBuilder builder = domFactory.newDocumentBuilder();
    73         Document doc = builder.parse(mappingFileAsStream);
    74         XPath xpath = XPathFactory.newInstance().newXPath();
    75         NodeList nodeList = (NodeList) xpath.evaluate("//Availability", doc, XPathConstants.NODESET);
    76         for (int i = 0; i < nodeList.getLength(); i++) {
    77             Node node = nodeList.item(i);
    78             String availabilityName = node.getAttributes().getNamedItem("name").getTextContent();
    79             NodeList childNodeList = node.getChildNodes();
    80             for (int j = 0; j < childNodeList.getLength(); j++) {
    81                 String license = childNodeList.item(j).getTextContent().toLowerCase();
    82                 result.put(license, availabilityName);
    83             }
    84         }
    85         return result;
    86     }
     26        @Override
     27        public String getNormalizationMapURL() {
     28                return MetadataImporter.config.getLicenseAvailabilityMapUrl();
     29        }
    8730}
  • vlo/trunk/vlo-importer/src/main/java/eu/clarin/cmdi/vlo/importer/LanguageCodePostProcessor.java

    r6142 r6811  
    11package eu.clarin.cmdi.vlo.importer;
    22
    3 import eu.clarin.cmdi.vlo.LanguageCodeUtils;
    4 import java.io.IOException;
    5 import java.io.InputStream;
    6 import java.net.MalformedURLException;
     3import java.util.ArrayList;
     4import java.util.List;
     5import java.util.regex.Matcher;
     6import java.util.regex.Pattern;
     7
    78import org.slf4j.Logger;
    89import org.slf4j.LoggerFactory;
    910
    10 import java.util.ArrayList;
    11 import java.util.HashMap;
    12 import java.util.List;
    13 import java.util.Map;
    14 import java.util.regex.Matcher;
    15 import java.util.regex.Pattern;
    16 import javax.xml.parsers.DocumentBuilder;
    17 import javax.xml.parsers.DocumentBuilderFactory;
    18 import javax.xml.parsers.ParserConfigurationException;
    19 import javax.xml.xpath.XPath;
    20 import javax.xml.xpath.XPathConstants;
    21 import javax.xml.xpath.XPathExpressionException;
    22 import javax.xml.xpath.XPathFactory;
    23 import org.w3c.dom.Document;
    24 import org.w3c.dom.Node;
    25 import org.w3c.dom.NodeList;
    26 import org.xml.sax.SAXException;
     11import eu.clarin.cmdi.vlo.LanguageCodeUtils;
    2712
    28 public class LanguageCodePostProcessor implements PostProcessor{
     13public class LanguageCodePostProcessor extends PostProcessorsWithControlledVocabulary{
    2914
    3015    private final static Logger LOG = LoggerFactory.getLogger(LanguageCodePostProcessor.class);
    3116   
    32     private static Map<String, String> languageNameVariantsMap = null;
    33 
    3417    protected static final String CODE_PREFIX = "code:";
    3518    protected static final String LANG_NAME_PREFIX = "name:";
     
    5740        return resultList;
    5841    }
     42   
     43        @Override
     44        public String getNormalizationMapURL() {
     45                return MetadataImporter.config.getLanguageNameVariantsUrl();
     46        }
    5947
    6048    protected String extractLanguageCode(String value) {
     
    6553       
    6654        // map known language name variants to their offical name
    67         if(getLanguageNamesVariantsMap().containsKey(result))
    68             result = getLanguageNamesVariantsMap().get(result);
     55        result = normalize(result);
    6956       
    7057        // input is already ISO 639-3?
     
    9683        return result;
    9784    }
    98    
    99         private Map<String, String> getLanguageNamesVariantsMap() {
    100         if (languageNameVariantsMap == null) {
    101             try {
    102                 // load records from file, in the future this should be loaded from CLAVAS directly and the file only used as fallback
    103                 languageNameVariantsMap = createControlledVocabularyMap(MetadataImporter.config.getLanguageNameVariantsUrl());
    104             } catch (Exception e) {
    105                 throw new RuntimeException("Cannot instantiate postProcessor:", e);
    106             }
    107         }
    108         return languageNameVariantsMap;
    109     }
    11085
    111     private Map<String, String> createControlledVocabularyMap(String languageNamesUrl) throws ParserConfigurationException, MalformedURLException, SAXException, XPathExpressionException, IOException {
    112         Map<String, String> result = new HashMap<String, String>();
    113         DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
    114         domFactory.setNamespaceAware(true);
    115 
    116         InputStream mappingFileAsStream;
    117         mappingFileAsStream = NationalProjectPostProcessor.class.getResourceAsStream(languageNamesUrl);
    118 
    119         DocumentBuilder builder = domFactory.newDocumentBuilder();
    120         Document doc = builder.parse(mappingFileAsStream);
    121         XPath xpath = XPathFactory.newInstance().newXPath();
    122         NodeList nodeList = (NodeList) xpath.evaluate("//Language", doc, XPathConstants.NODESET);
    123         for (int i = 0; i < nodeList.getLength(); i++) {
    124             Node node = nodeList.item(i);
    125             String languageName = node.getAttributes().getNamedItem("name").getTextContent();
    126             NodeList childNodeList = node.getChildNodes();
    127             for (int j = 0; j < childNodeList.getLength(); j++) {
    128                 String variation = childNodeList.item(j).getTextContent();
    129                 result.put(variation, languageName);
    130             }
    131         }
    132         return result;
    133     }
    13486}
  • vlo/trunk/vlo-importer/src/main/java/eu/clarin/cmdi/vlo/importer/NationalProjectPostProcessor.java

    r5754 r6811  
    11package eu.clarin.cmdi.vlo.importer;
    22
    3 import java.io.File;
    4 import java.io.IOException;
    5 import java.io.InputStream;
    63import java.util.ArrayList;
    7 import java.util.HashMap;
    84import java.util.List;
    9 import java.util.Map;
    10 import java.util.regex.Matcher;
    11 import java.util.regex.Pattern;
    12 import javax.xml.parsers.DocumentBuilder;
    13 import javax.xml.parsers.DocumentBuilderFactory;
    14 import javax.xml.xpath.XPath;
    15 import javax.xml.xpath.XPathConstants;
    16 import javax.xml.xpath.XPathExpressionException;
    17 import javax.xml.xpath.XPathFactory;
    18 import org.apache.commons.io.FileUtils;
    19 import org.slf4j.Logger;
    20 import org.slf4j.LoggerFactory;
    21 import org.w3c.dom.Document;
    22 import org.w3c.dom.NodeList;
     5
    236
    247/**
     
    2912 *
    3013 */
    31 public class NationalProjectPostProcessor extends LanguageCodePostProcessor {
     14public class NationalProjectPostProcessor extends PostProcessorsWithControlledVocabulary {
    3215
    33     private final static Logger LOG = LoggerFactory.getLogger(NationalProjectPostProcessor.class);
    34 
    35     private static Map<String, String> nationalProjectMap = null;
    36     private static Map<Pattern, String> nationalProjectRegExpMap = null;
    3716
    3817    /**
     
    4524    @Override
    4625    public List<String> process(String value) {
    47         String input = value.trim();
    4826        List<String> resultList = new ArrayList<String>();
    49 
    50         if (input != null && getMapping().containsKey(input)) {
    51             resultList.add(getMapping().get(input));
    52             return resultList;
    53         }
    54 
    55         for (Pattern pattern : getRegExpMapping().keySet()) {
    56             Matcher matcher = pattern.matcher(input);
    57             if (matcher.find()) {
    58                 resultList.add(getRegExpMapping().get(pattern));
    59                 return resultList;
    60             }
    61         }
    62 
    63         resultList.add("");
     27        resultList.add(normalize(value.trim(), ""));
     28       
    6429        return resultList;
    6530    }
     31   
    6632
    67     private Map<String, String> getMapping() {
    68         if (nationalProjectMap == null) {
    69             getNationalProjectMapping();
    70         }
    71         return nationalProjectMap;
    72     }
    73 
    74     private Map<Pattern, String> getRegExpMapping() {
    75         if (nationalProjectRegExpMap == null) {
    76             getNationalProjectMapping();
    77         }
    78         return nationalProjectRegExpMap;
    79     }
    80 
    81     private void getNationalProjectMapping() {
    82         String projectsMappingFile = MetadataImporter.config.getNationalProjectMapping();
    83 
    84         if (projectsMappingFile.length() == 0) {
    85             // use the packaged project mapping file
    86             projectsMappingFile = "/nationalProjectsMapping.xml";
    87         }
    88 
    89         try {
    90             nationalProjectMap = new HashMap<String, String>();
    91             nationalProjectRegExpMap = new HashMap<Pattern, String>();
    92             DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
    93             domFactory.setNamespaceAware(true);
    94 
    95             DocumentBuilder builder = domFactory.newDocumentBuilder();
    96             Document doc;
    97 
    98             // first, try to open the packaged mapping file
    99             InputStream mappingFileAsStream;
    100             mappingFileAsStream = NationalProjectPostProcessor.class.getResourceAsStream(projectsMappingFile);
    101 
    102             if (mappingFileAsStream != null) {
    103                 doc = builder.parse(mappingFileAsStream);
    104             } else {
    105                 // the resource cannot be found inside the package, try outside
    106                 File mappingAsFile;
    107 
    108                 mappingAsFile = new File(projectsMappingFile);
    109 
    110                 if (!mappingAsFile.exists()) {
    111                     LOG.info("National project mapping file does not exist - using minimal test file.");
    112                     mappingAsFile = createMinimalMappingFile();
    113                 }
    114                 doc = builder.parse(mappingAsFile);
    115             }
    116 
    117             XPath xpath = XPathFactory.newInstance().newXPath();
    118             NodeList nodeList = (NodeList) xpath.evaluate("//nationalProjectMapping", doc, XPathConstants.NODESET);
    119             for (int i = 1; i <= nodeList.getLength(); i++) {
    120                 String mdCollectionDisplayName = xpath.evaluate("//nationalProjectMapping[" + i + "]/MdCollectionDisplayName", doc).trim();
    121                 String nationalProject = xpath.evaluate("//nationalProjectMapping[" + i + "]/NationalProject", doc).trim();
    122                 Boolean isRegExp;
    123                 try {
    124                     isRegExp = Boolean.parseBoolean(xpath.evaluate("//nationalProjectMapping[" + i + "]/MdCollectionDisplayName/@isRegExp", doc));
    125                 } catch (XPathExpressionException xee) {
    126                     isRegExp = false;
    127                 }
    128                 if (isRegExp == true) {
    129                     nationalProjectRegExpMap.put(Pattern.compile(mdCollectionDisplayName), nationalProject);
    130                 } else {
    131                     nationalProjectMap.put(mdCollectionDisplayName, nationalProject);
    132                 }
    133             }
    134         } catch (Exception e) {
    135             throw new RuntimeException("Cannot instantiate postProcessor:", e);
    136         }
    137     }
    138 
    139     /**
    140      * Create temporary and minimal mapping file for testing purposes and as
    141      * backup solution
    142      *
    143      * @return minimal file for national projects mapping (e.g. TLA: ANDES ->
    144      * CLARIN-EU)
    145      */
    146     private File createMinimalMappingFile() {
    147         String content = "";
    148         content += "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n";
    149         content += "<nationalProjects>\n";
    150         content += "   <nationalProjectMapping><MdCollectionDisplayName>TLA: ANDES</MdCollectionDisplayName><NationalProject>CLARIN-EU</NationalProject></nationalProjectMapping>\n";
    151         content += "   <nationalProjectMapping><MdCollectionDisplayName isRegExp=\"true\">Meertens.*</MdCollectionDisplayName><NationalProject>CLARIN-NL</NationalProject></nationalProjectMapping>\n";
    152         content += "</nationalProjects>\n";
    153 
    154         File file = null;
    155         try {
    156             file = File.createTempFile("vlo.nationalTestMapping", ".map");
    157             FileUtils.writeStringToFile(file, content, "UTF-8");
    158         } catch (IOException ioe) {
    159             LOG.error("Could not create temporary national project mapping file");
    160         }
    161 
    162         return file;
    163     }
     33        @Override
     34        public String getNormalizationMapURL() {
     35                return MetadataImporter.config.getNationalProjectMapping();
     36        }
     37   
    16438}
  • vlo/trunk/vlo-importer/src/main/java/eu/clarin/cmdi/vlo/importer/OrganisationPostProcessor.java

    r6020 r6811  
    11package eu.clarin.cmdi.vlo.importer;
    22
    3 import java.io.IOException;
    4 import java.io.InputStream;
    5 import java.net.MalformedURLException;
    63import java.util.Arrays;
    7 import java.util.HashMap;
    84import java.util.List;
    9 import java.util.Map;
    10 import javax.xml.parsers.DocumentBuilder;
    11 import javax.xml.parsers.DocumentBuilderFactory;
    12 import javax.xml.parsers.ParserConfigurationException;
    13 import javax.xml.xpath.XPath;
    14 import javax.xml.xpath.XPathConstants;
    15 import javax.xml.xpath.XPathExpressionException;
    16 import javax.xml.xpath.XPathFactory;
    17 import org.w3c.dom.Document;
    18 import org.w3c.dom.Node;
    19 import org.w3c.dom.NodeList;
    20 import org.xml.sax.SAXException;
    215
    22 public class OrganisationPostProcessor implements PostProcessor {
    23 
    24     private static Map<String, String> organisationNamesMap = null;
    25 
     6public class OrganisationPostProcessor extends PostProcessorsWithControlledVocabulary{
     7         
    268    /**
    279     * Splits values for organisation facet at delimiter ';' and replaces
     
    3719        String[] splitArray = normalizeInputString(value).split(";");
    3820        for (int i = 0; i < splitArray.length; i++) {
    39             String orgaName = splitArray[i];
    40             if (getNormalizedOrganisationNamesMap().containsKey(normalizeVariant(orgaName))) {
    41                 splitArray[i] = getNormalizedOrganisationNamesMap().get(normalizeVariant(orgaName));
    42             }
     21                String normalizedVal = normalize(splitArray[i], null);
     22                if(normalizedVal != null)
     23                splitArray[i] = normalizedVal;
    4324        }
    4425       
    4526        return Arrays.asList(splitArray);
    4627    }
     28   
     29        @Override
     30        public String getNormalizationMapURL() {
     31                return MetadataImporter.config.getOrganisationNamesUrl();
     32        }
    4733   
    4834    private String normalizeInputString(String value) {
     
    5440    }
    5541
    56     private Map<String, String> getNormalizedOrganisationNamesMap() {
    57         if (organisationNamesMap == null) {
    58             try {
    59                 // load records from file, in the future this should be loaded from CLAVAS directly and the file only used as fallback
    60                 organisationNamesMap = createControlledVocabularyMap(MetadataImporter.config.getOrganisationNamesUrl());
    61             } catch (Exception e) {
    62                 throw new RuntimeException("Cannot instantiate postProcessor:", e);
    63             }
    64         }
    65         return organisationNamesMap;
    66     }
    6742
    68     private Map<String, String> createControlledVocabularyMap(String urlToVocabularyFile) throws ParserConfigurationException, MalformedURLException, SAXException, XPathExpressionException, IOException {
    69         Map<String, String> result = new HashMap<String, String>();
    70         DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
    71         domFactory.setNamespaceAware(true);
    72 
    73         InputStream mappingFileAsStream;
    74         mappingFileAsStream = NationalProjectPostProcessor.class.getResourceAsStream(urlToVocabularyFile);
    75 
    76         DocumentBuilder builder = domFactory.newDocumentBuilder();
    77         Document doc = builder.parse(mappingFileAsStream);
    78         XPath xpath = XPathFactory.newInstance().newXPath();
    79         NodeList nodeList = (NodeList) xpath.evaluate("//Organisation", doc, XPathConstants.NODESET);
    80         for (int i = 0; i < nodeList.getLength(); i++) {
    81             Node node = nodeList.item(i);
    82             String organisationName = node.getAttributes().getNamedItem("name").getTextContent();
    83             NodeList childNodeList = node.getChildNodes();
    84             for (int j = 0; j < childNodeList.getLength(); j++) {
    85                 String variation = normalizeVariant(childNodeList.item(j).getTextContent());
    86                 result.put(variation, organisationName);
    87             }
    88         }
    89         return result;
    90     }
     43       
     44   
     45   
    9146}
  • vlo/trunk/vlo-importer/src/main/java/eu/clarin/cmdi/vlo/importer/PostProcessorsWithControlledVocabulary.java

    r6797 r6811  
    44import java.io.FileInputStream;
    55import java.io.InputStream;
     6import java.nio.file.Path;
     7import java.nio.file.Paths;
    68import java.util.Map;
    79
     
    911import org.slf4j.LoggerFactory;
    1012
    11 import eu.clarin.cmdi.vlo.normalization.NormalizationService;
    12 import eu.clarin.cmdi.vlo.normalization.NormalizationVocabulary;
    13 import eu.clarin.cmdi.vlo.normalization.VocabularyEntry;
    14 import eu.clarin.cmdi.vlo.pojo.VariantsMap;
    15 import eu.clarin.cmdi.vlo.transformers.VariantsMapMarshaller;
     13import eu.clarin.cmdi.vlo.normalization.pojo.VariantsMap;
     14import eu.clarin.cmdi.vlo.normalization.service.NormalizationService;
     15import eu.clarin.cmdi.vlo.normalization.service.NormalizationVocabulary;
     16import eu.clarin.cmdi.vlo.normalization.service.VariantsMapMarshaller;
    1617
    1718/*
     
    2526 */
    2627
    27 public abstract class PostProcessorsWithVocabularyMap implements PostProcessor, NormalizationService {
     28public abstract class PostProcessorsWithControlledVocabulary implements PostProcessor, NormalizationService {
    2829
    29         private final static Logger _logger = LoggerFactory.getLogger(PostProcessorsWithVocabularyMap.class);
     30        private final static Logger _logger = LoggerFactory.getLogger(PostProcessorsWithControlledVocabulary.class);
    3031
    3132        private NormalizationVocabulary vocabulary;
     
    6061        protected VariantsMap getMappingFromFile(String mapUrl) {
    6162               
    62 
    63                         _logger.info("Reading vocabulary file from: {}", mapUrl);
    64                         // load records from file
    65                         // in the future this should be loaded from CLAVAS directly and the
    66                         // file only used as fallback
     63                InputStream is = null;
     64                File mapUrlFile = new File(mapUrl);
     65                _logger.info("Reading vocabulary file from: {}", mapUrl);
     66                // load records from file
     67                // in the future this should be loaded from CLAVAS directly and the
     68                // file only used as fallback
     69               
     70               
     71                //try from file and if not exists fetch it from classpath (root of the vlo-vocabularies project)
     72                Path p = Paths.get(mapUrl);
     73               
     74                try {
     75                        is = new FileInputStream(mapUrlFile);
     76                } catch (Exception e) {
     77                        _logger.warn("File {} not found, trying to fetch it from classpath ...", mapUrl);
    6778                       
    68                         InputStream is = PostProcessorsWithVocabularyMap.class.getClassLoader().getResourceAsStream(mapUrl);
     79                        is = PostProcessorsWithControlledVocabulary.class.getClassLoader().getResourceAsStream(mapUrlFile.getName());
    6980                        if(is == null)
    7081                                throw new RuntimeException("Cannot instantiate postProcessor, " + mapUrl + " is not on the classpath");
    71                        
    72                         try{
    73                                 return VariantsMapMarshaller.unmarshal(is);
    74                         } catch (Exception e) {
    75                                 throw new RuntimeException("Cannot instantiate postProcessor: ", e);
    76                         }
     82                }
     83               
     84                try{
     85                        return VariantsMapMarshaller.unmarshal(is);
     86                } catch (Exception e) {
     87                        throw new RuntimeException("Cannot instantiate postProcessor: ", e);
     88                }
    7789                       
    7890               
  • vlo/trunk/vlo-importer/src/main/java/eu/clarin/cmdi/vlo/importer/ResourceClassPostProcessor.java

    r5984 r6811  
    44import java.util.List;
    55
    6 public class ResourceClassPostProcessor extends LanguageCodePostProcessor {
     6public class ResourceClassPostProcessor implements PostProcessor {
    77    /**
    88     * Postprocess ResourceClass values
  • vlo/trunk/vlo-importer/src/main/java/eu/clarin/cmdi/vlo/importer/VLOMarshaller.java

    r6205 r6811  
    22
    33import eu.clarin.cmdi.vlo.config.VloConfig;
     4
    45import java.io.FileInputStream;
    56import java.io.FileNotFoundException;
     
    1112import javax.xml.bind.Marshaller;
    1213import javax.xml.bind.Unmarshaller;
     14
    1315import org.slf4j.Logger;
    1416import org.slf4j.LoggerFactory;
     
    2426     * @return the facet concept mapping
    2527     */
    26     public static FacetConceptMapping getFacetConceptMapping(
    27             String facetConcepts) {
     28    public static FacetConceptMapping getFacetConceptMapping(String facetConcepts) {
     29       
     30        FacetConceptMapping result;
     31        InputStream is = null;
    2832
    29         if (facetConcepts == null || "".equals(facetConcepts)) {
    30             return unmarshal(VLOMarshaller.class.getResourceAsStream(VloConfig.DEFAULT_FACET_CONCEPTS_RESOURCE_FILE));
    31         } else {
    32             try {
    33                 return unmarshal(new FileInputStream(facetConcepts));
    34             } catch (FileNotFoundException ex) {
     33        try {
     34                is = (facetConcepts == null || "".equals(facetConcepts))?
     35                        VLOMarshaller.class.getResourceAsStream(VloConfig.DEFAULT_FACET_CONCEPTS_RESOURCE_FILE) :
     36                        new FileInputStream(facetConcepts);
     37        } catch (FileNotFoundException e) {
    3538                logger.error("Could not find facets file: {}", facetConcepts);
    3639                return null;
    37             }
    3840        }
     41       
     42        return unmarshal(is);
    3943    }
     44   
    4045
    4146    /**
     
    4651     */
    4752    static FacetConceptMapping unmarshal(InputStream inputStream) {
    48         try {
    49             JAXBContext jc = JAXBContext.newInstance(FacetConceptMapping.class);
    50             Unmarshaller u = jc.createUnmarshaller();
    51             FacetConceptMapping result = (FacetConceptMapping) u.unmarshal(inputStream);
    52             result.check();
    53             return result;
    54         } catch (JAXBException e) {
    55             throw new RuntimeException(e);
    56         }
     53        FacetConceptMapping result;
     54       
     55                try {                   
     56                        JAXBContext jc = JAXBContext.newInstance(FacetConceptMapping.class);
     57                Unmarshaller u = jc.createUnmarshaller();
     58                result = (FacetConceptMapping) u.unmarshal(inputStream);       
     59                } catch (JAXBException e) {
     60                        throw new RuntimeException();
     61                }
     62               
     63                result.check();
     64            return result;
    5765    }
     66   
    5867
    5968    /**
  • vlo/trunk/vlo-importer/src/test/java/eu/clarin/cmdi/vlo/importer/ImporterTestcase.java

    r6208 r6811  
    1818    private final VloConfigFactory configFactory = new DefaultVloConfigFactory();
    1919    protected VloConfig config;
     20    private char ch = 'a';
    2021
    2122    @Rule
     
    2324
    2425    protected File createCmdiFile(String name, String content) throws IOException {
    25         File file = tempFolder.newFile(name + System.currentTimeMillis() + ".cmdi");
     26        File file = tempFolder.newFile(name + System.currentTimeMillis() + "_" + ch++ + ".cmdi");
    2627        FileUtils.writeStringToFile(file, content, "UTF-8");
    2728        return file;
  • vlo/trunk/vlo-importer/src/test/java/eu/clarin/cmdi/vlo/importer/NationalProjectPostProcessorTest.java

    r5754 r6811  
    33import eu.clarin.cmdi.vlo.config.DefaultVloConfigFactory;
    44import static org.junit.Assert.assertEquals;
     5
    56import org.junit.Before;
    67import org.junit.Test;
    78
    89public class NationalProjectPostProcessorTest extends ImporterTestcase {
    9 
    10     @Before
    11     public void setUp() throws Exception {
    12         new DefaultVloConfigFactory().newConfig();
    13     }
    1410
    1511    @Test
Note: See TracChangeset for help on using the changeset viewer.