Changeset 6020
Legend:
- Unmodified
- Added
- Removed
-
vlo/trunk/vlo-commons/src/main/resources/OrganisationControlledVocabulary.xml
r5885 r6020 761 761 <Variation>University of Helsinki, Department of General Linguistics.</Variation> 762 762 </Organisation> 763 <Organisation name="MPI for Psycholinguistics, MPG">764 <Variation>MPI</Variation>765 <Variation>Max Planck Institute for Psycholinguistics / MPG</Variation>766 <Variation>MPI for Psycholinguistics</Variation>767 <Variation>Max Planck Institute for Psycholinguistics</Variation>768 <Variation>MPI Nijmegen</Variation>769 </Organisation>770 763 <Organisation name="Heidelberger Akademie der Wissenschaften, University of Heidelberg"> 771 764 <Variation>University of Heidelberg, Heidelberger Akademie der Wissenschaften</Variation> … … 1488 1481 <Organisation name="MPI"> 1489 1482 <Variation>Max Planck Institute</Variation> 1483 <Variation>Max Planck Institut</Variation> 1490 1484 </Organisation> 1491 1485 <Organisation name="Finanstilsynet, OEM"/> … … 1868 1862 <Variation>Université Paris X- Nanterre</Variation> 1869 1863 <Variation>Université Paris X Nanterre</Variation> 1870 <Variation>Max Planck Institute for Evolutionary Anthropology</Variation>1871 1864 </Organisation> 1872 1865 <Organisation name="Goethe University Frankfurt, Institute of Empirical Linguistics"> … … 1879 1872 <Organisation name="Max Planck Institute for Evolutionary Anthropology"> 1880 1873 <Variation>Max-Planck-Institute for Evolutionary Anthropology</Variation> 1874 <Variation>Max Planck Institute for Evolutionary Anthropology</Variation> 1881 1875 </Organisation> 1882 1876 <Organisation name="National Foreign Language Resource Center, University of Hawai?i"/> … … 2063 2057 <Variation>Max Planck Institute for Psycolinguistics</Variation> 2064 2058 <Variation>Max Planck Institute for Psycholinguisticsc</Variation> 2059 <Variation>Max Planck Institut fuer Psycholinguistik, Nijmegen, Nl.</Variation> 2060 <Variation>Max-Planck-Institut fÃŒr Psycholinguistik</Variation> 2061 <Variation>Max Planck Institute for Psycholinguistics / MPG</Variation> 2062 <Variation>MPI for Psycholinguistics</Variation> 2063 <Variation>MPI for psycholinguistics, Nijmegen</Variation> 2064 <Variation>Max Planck Institute for Psycholinguistics</Variation> 2065 <Variation>MPI Nijmegen</Variation> 2066 <Variation>Max-Planck-Institut fÃÂŒr Psycholinguistik</Variation> 2067 <Variation>Max Planck for Psycholinguisics, Nijmegen, NL</Variation> 2068 <Variation>Max-Planck-InstitÃŒt fÃŒr Psycholinguïstik</Variation> 2069 <Variation>Max-Planck-InstitÃŒt fÃŒr Psycholinguïstiek</Variation> 2070 <Variation>Mac Planck Institute for Psycholinguistics</Variation> 2071 <Variation>Max Planck Institut fÃÂŒr Psychlinguistik</Variation> 2072 <Variation>Max Planck Institut fÃÂÃÂŒr Psychlinguistik</Variation> 2073 <Variation>Max Planck Institute for Psycho-Linguistics</Variation> 2074 <Variation>MPI for Psycholinguistics (Nijmegen, Netherlands)</Variation> 2075 <Variation>Max Plank Institute for Psycholinguistics</Variation> 2076 <Variation>MPI for Psycholinguistics Nijmegen</Variation> 2077 <Variation>Max-Planck-InstitÃŒt fÃŒr Psucholinguïstik</Variation> 2078 <Variation>Max-Planck-InstitÃŒt fÃŒr Psychlinguïstik</Variation> 2065 2079 </Organisation> 2066 2080 <Organisation name="Witwatersrand University Press"> … … 2135 2149 <Organisation name="Institute of Cybernetics, Tallinn University of Technology"> 2136 2150 <Variation>Institute of Cybernetics at Tallinn University of Technology</Variation> 2137 </Organisation>2138 <Organisation name="MPI fÃŒr Psycholinguistik">2139 <Variation>Max Planck Institut fuer Psycholinguistik, Nijmegen, Nl.</Variation>2140 <Variation>Max-Planck-Institut fÃŒr Psycholinguistik</Variation>2141 2151 </Organisation> 2142 2152 <Organisation name="Sheldon Press"> -
vlo/trunk/vlo-importer/src/main/java/eu/clarin/cmdi/vlo/importer/OrganisationPostProcessor.java
r6001 r6020 35 35 @Override 36 36 public List<String> process(String value) { 37 String[] splitArray = normalize String(value).split(";");37 String[] splitArray = normalizeInputString(value).split(";"); 38 38 for (int i = 0; i < splitArray.length; i++) { 39 39 String orgaName = splitArray[i]; 40 if (getNormalizedOrganisationNamesMap().containsKey( orgaName)) {41 splitArray[i] = getNormalizedOrganisationNamesMap().get( orgaName);40 if (getNormalizedOrganisationNamesMap().containsKey(normalizeVariant(orgaName))) { 41 splitArray[i] = getNormalizedOrganisationNamesMap().get(normalizeVariant(orgaName)); 42 42 } 43 43 } … … 46 46 } 47 47 48 private String normalize String(String value) {48 private String normalizeInputString(String value) { 49 49 return value.replaceAll("\\s+", " "); 50 } 51 52 private String normalizeVariant(String key) { 53 return key.toLowerCase().replaceAll("-", " "); 50 54 } 51 55 … … 79 83 NodeList childNodeList = node.getChildNodes(); 80 84 for (int j = 0; j < childNodeList.getLength(); j++) { 81 String variation = childNodeList.item(j).getTextContent();85 String variation = normalizeVariant(childNodeList.item(j).getTextContent()); 82 86 result.put(variation, organisationName); 83 87 } -
vlo/trunk/vlo-importer/src/test/java/eu/clarin/cmdi/vlo/importer/CMDIDataProcessorTest.java
r5979 r6020 397 397 assertEquals("English", doc.getFieldValue("language")); 398 398 assertEquals("Netherlands", doc.getFieldValue("country")); 399 assertEquals("M PI for Psycholinguistics, MPG", doc.getFieldValue("organisation"));399 assertEquals("Max Planck Institute for Psycholinguistics", doc.getFieldValue("organisation")); 400 400 assertEquals("demo", doc.getFieldValue("genre")); 401 401 assertEquals(
Note: See TracChangeset
for help on using the changeset viewer.