Changeset 5143
- Timestamp:
- 05/07/14 12:20:11 (10 years ago)
- Location:
- vlo/trunk/vlo-importer/src
- Files:
-
- 7 edited
- 3 copied
Legend:
- Unmodified
- Added
- Removed
-
vlo/trunk/vlo-importer/src/main/java/eu/clarin/cmdi/vlo/importer/LanguageCodePostProcessor.java
r4575 r5143 35 35 36 36 /** 37 * Returns the language name based on the mapping defined in the CMDI components: See http://trac.clarin.eu/ticket/40 for the mapping.37 * Returns the language code based on the mapping defined in the CMDI components: See http://trac.clarin.eu/ticket/40 for the mapping. 38 38 * If no mapping is found the original value is returned. 39 * @param value extracted language value (language code or language name) from CMDI file 40 * @return ISO 639-3 code 39 41 */ 40 42 @Override 41 43 public String process(String value) { 42 String result = value; 43 if (value != null) { 44 String langCode = extractLanguageCode(value); 45 if (langCode.length() == 2) { 46 Map<String, String> twoLetterCodesMap = getTwoLetterCountryCodeMap(); 47 String name = twoLetterCodesMap.get(langCode.toUpperCase()); 48 if (name != null) { 49 result = name; 50 } 51 } else if (langCode.length() == 3) { 52 Map<String, String> threeLetterCodesMap = getThreeLetterCountryCodeMap(); 53 String name = threeLetterCodesMap.get(langCode.toUpperCase()); 54 if (name != null) { 55 result = name; 56 } 57 } 58 } 59 return result; 44 if (value != null) 45 return extractLanguageCode(value); 46 else 47 return null; 60 48 } 61 49 62 50 protected String extractLanguageCode(String value) { 63 51 String result = value; 52 53 // deal with prefixes or language names 64 54 if (value.length() != 2 && value.length() != 3) { 65 55 if (value.startsWith(ISO639_3_PREFIX)) { 66 re sult = value.substring(ISO639_3_PREFIX.length());56 return value.substring(ISO639_3_PREFIX.length()).toLowerCase(); 67 57 } else if (value.startsWith(SIL_CODE_PREFIX) || value.startsWith(SIL_CODE_PREFIX_alt)) { 68 58 result = value.substring(value.lastIndexOf("-")+1); 69 Map<String, String> silToISOMap = getSilToIso639Map();70 String isoCode = silToI SOMap.get(result.toUpperCase());59 silToIso639Map = getSilToIso639Map(); 60 String isoCode = silToIso639Map.get(result.toLowerCase()); 71 61 if (isoCode != null) { 72 62 result = isoCode; 73 63 } 64 } else if(getLanguageNameToIso639Map().containsKey(value)) { // (english) language name? 65 return getLanguageNameToIso639Map().get(value); 74 66 } 75 67 } 76 68 69 // map 2-letter codes to ISO 639-3 70 if(result.length() == 2) { 71 if(silToIso639Map == null) 72 silToIso639Map = getSilToIso639Map(); 73 result = silToIso639Map.get(result.toLowerCase()); 74 } 75 77 76 // convert ISO 639-2/T codes to ISO 639-3 78 if (getIso6392TToISO6393Map().containsKey(value.toUpperCase())) { 79 result = getIso6392TToISO6393Map().get(value.toUpperCase()); 80 } 81 82 // Convert to lowercase to capture erroneously capitalized language codes in the CMDI files. 83 // NOTE: In the mappings themselves we do not capitalize. 84 result = result.toLowerCase(); 77 if (getIso6392TToISO6393Map().containsKey(value.toLowerCase())) { 78 result = getIso6392TToISO6393Map().get(value.toLowerCase()); 79 } 80 85 81 return result; 86 82 } … … 94 90 return result; 95 91 } 96 92 97 93 protected Map<String, String> getSilToIso639Map() { 98 94 if (silToIso639Map == null) { … … 102 98 } 103 99 104 pr ivateMap<String, String> getTwoLetterCountryCodeMap() {100 protected Map<String, String> getTwoLetterCountryCodeMap() { 105 101 if (twoLetterCodesMap == null) { 106 102 twoLetterCodesMap = createCodeMap(MetadataImporter.config.getLanguage2LetterCodeComponentUrl()); … … 109 105 } 110 106 111 pr ivateMap<String, String> getThreeLetterCountryCodeMap() {107 protected Map<String, String> getThreeLetterCountryCodeMap() { 112 108 if (threeLetterCodesMap == null) { 113 109 threeLetterCodesMap = createCodeMap(MetadataImporter.config.getLanguage3LetterCodeComponentUrl()); … … 118 114 protected Map<String, String> getLanguageNameToIso639Map() { 119 115 if (languageNameToIso639Map == null) { 120 116 languageNameToIso639Map = createReverseCodeMap(MetadataImporter.config.getLanguage3LetterCodeComponentUrl()); 121 117 } 122 118 return languageNameToIso639Map; … … 130 126 return iso639ToLanguageNameMap; 131 127 } 132 128 133 129 /** 134 130 * Returns map of ISO 639-2/B codes to ISO 639-3 … … 141 137 if (iso639_2TToISO639_3Map == null) { 142 138 iso639_2TToISO639_3Map = new HashMap<String, String>(); 143 iso639_2TToISO639_3Map.put(" ALB", "SQI");144 iso639_2TToISO639_3Map.put(" ARM", "HYE");145 iso639_2TToISO639_3Map.put(" BAQ", "EUS");146 iso639_2TToISO639_3Map.put(" BUR", "MYA");147 iso639_2TToISO639_3Map.put(" CZE", "CES");148 iso639_2TToISO639_3Map.put(" CHI", "ZHO");149 iso639_2TToISO639_3Map.put(" DUT", "NLD");150 iso639_2TToISO639_3Map.put(" FRE", "FRA");151 iso639_2TToISO639_3Map.put(" GEO", "KAT");152 iso639_2TToISO639_3Map.put(" GER", "DEU");153 iso639_2TToISO639_3Map.put(" GRE", "ELL");154 iso639_2TToISO639_3Map.put(" ICE", "ISL");155 iso639_2TToISO639_3Map.put(" MAC", "MKD");156 iso639_2TToISO639_3Map.put(" MAO", "MRI");157 iso639_2TToISO639_3Map.put(" MAY", "MSA");158 iso639_2TToISO639_3Map.put(" PER", "FAS");159 iso639_2TToISO639_3Map.put(" RUM", "RON");160 iso639_2TToISO639_3Map.put(" SLO", "SLK");161 iso639_2TToISO639_3Map.put(" TIB", "BOD");162 iso639_2TToISO639_3Map.put(" WEL", "CYM");139 iso639_2TToISO639_3Map.put("alb", "sqi"); 140 iso639_2TToISO639_3Map.put("arm", "hye"); 141 iso639_2TToISO639_3Map.put("baq", "eus"); 142 iso639_2TToISO639_3Map.put("bur", "mya"); 143 iso639_2TToISO639_3Map.put("cze", "ces"); 144 iso639_2TToISO639_3Map.put("chi", "zho"); 145 iso639_2TToISO639_3Map.put("dut", "nld"); 146 iso639_2TToISO639_3Map.put("fre", "fra"); 147 iso639_2TToISO639_3Map.put("geo", "kat"); 148 iso639_2TToISO639_3Map.put("ger", "deu"); 149 iso639_2TToISO639_3Map.put("gre", "ell"); 150 iso639_2TToISO639_3Map.put("ice", "isl"); 151 iso639_2TToISO639_3Map.put("max", "mkd"); 152 iso639_2TToISO639_3Map.put("mao", "mri"); 153 iso639_2TToISO639_3Map.put("may", "msa"); 154 iso639_2TToISO639_3Map.put("per", "fas"); 155 iso639_2TToISO639_3Map.put("rum", "ron"); 156 iso639_2TToISO639_3Map.put("slo", "slk"); 157 iso639_2TToISO639_3Map.put("tib", "bod"); 158 iso639_2TToISO639_3Map.put("wel", "cym"); 163 159 } 164 160 … … 201 197 String silCode = node.getFirstChild().getTextContent(); 202 198 String isoCode = node.getLastChild().getTextContent(); 203 result.put(silCode .toUpperCase(), isoCode);199 result.put(silCode, isoCode); 204 200 } 205 201 return result; -
vlo/trunk/vlo-importer/src/main/java/eu/clarin/cmdi/vlo/importer/LanguageLinkPostProcessor.java
r4507 r5143 1 1 package eu.clarin.cmdi.vlo.importer; 2 2 3 import java.util.Map;4 5 6 3 public class LanguageLinkPostProcessor extends LanguageCodePostProcessor { 7 private static LanguageCodePostProcessor languageCodePostProcessor = new LanguageCodePostProcessor();8 9 4 /** 10 5 * Returns the link to language information 11 6 * If no mapping is found the original value is returned. 7 * @param value extracted language information 8 * @return HTML link to the CLARIN language information page 12 9 */ 13 10 @Override 14 public String process(String value) { 15 String result = languageCodePostProcessor.process(value);16 if (value != null) {17 String langCode = extractISO639LanguageCode(value);11 public String process(String value) { 12 String langCode = super.process(value); 13 String result = langCode; 14 if (langCode != null) { 18 15 if(langCode.length() == 3) 19 16 result = "<a href=\""+ MetadataImporter.config.getLanguageLinkPrefix() + langCode+"\">"+getLanguageNameForLanguageCode(langCode.toUpperCase())+"</a>"; … … 21 18 return result; 22 19 } 23 24 /**25 * Try to guess the ISO 639-3 language code from value26 * @param value27 * @return ISO 639-3 code, or parameter value if it could not determined28 */29 protected String extractISO639LanguageCode(String value) {30 String result = value.toLowerCase();31 if (value.length() != 2 && value.length() != 3) {32 if (value.startsWith(ISO639_3_PREFIX)) {33 result = value.substring(ISO639_3_PREFIX.length());34 } else if (value.startsWith(SIL_CODE_PREFIX) || value.startsWith(SIL_CODE_PREFIX_alt)) {35 result = value.substring(value.lastIndexOf("-")+1);36 Map<String, String> silToISOMap = getSilToIso639Map();37 String isoCode = silToISOMap.get(result.toUpperCase());38 if (isoCode != null) {39 result = isoCode;40 }41 } else { // guessing based on language name42 if(getLanguageNameToIso639Map().containsKey(value))43 result = getLanguageNameToIso639Map().get(value).toLowerCase();44 }45 }46 47 // SIL code?48 if(result.length() == 2) {49 Map<String, String> silToISOMap = getSilToIso639Map();50 String isoCode = silToISOMap.get(result.toUpperCase());51 if (isoCode != null) {52 result = isoCode;53 }54 }55 56 return result;57 }58 20 } -
vlo/trunk/vlo-importer/src/main/java/eu/clarin/cmdi/vlo/importer/LanguageNamePostProcessor.java
r5137 r5143 1 1 package eu.clarin.cmdi.vlo.importer; 2 2 3 import java.net.URL;4 import java.util.HashMap;5 3 import java.util.Map; 6 7 import javax.xml.parsers.DocumentBuilder;8 import javax.xml.parsers.DocumentBuilderFactory;9 import javax.xml.xpath.XPath;10 import javax.xml.xpath.XPathConstants;11 import javax.xml.xpath.XPathFactory;12 13 4 import org.slf4j.Logger; 14 5 import org.slf4j.LoggerFactory; 15 import org.w3c.dom.Document;16 import org.w3c.dom.Node;17 import org.w3c.dom.NodeList;18 6 19 import eu.clarin.cmdi.vlo.CommonUtils;20 7 21 public class Language CodePostProcessor implements PostProcessor{8 public class LanguageNamePostProcessor extends LanguageCodePostProcessor { 22 9 23 private final static Logger LOG = LoggerFactory.getLogger(LanguageCodePostProcessor.class); 24 25 protected static final String ISO639_3_PREFIX = "ISO639-3:"; 26 protected static final String SIL_CODE_PREFIX = "RFC1766:x-sil-"; 27 protected static final String SIL_CODE_PREFIX_alt = "RFC-1766:x-sil-"; 10 private final static Logger LOG = LoggerFactory.getLogger(LanguageNamePostProcessor.class); 28 11 29 12 private Map<String, String> twoLetterCodesMap; 30 13 private Map<String, String> threeLetterCodesMap; 31 private Map<String, String> silToIso639Map;32 private Map<String, String> languageNameToIso639Map;33 private Map<String, String> iso639ToLanguageNameMap;34 private Map<String, String> iso639_2TToISO639_3Map;35 14 36 15 /** … … 44 23 String langCode = extractLanguageCode(value); 45 24 if (langCode.length() == 2) { 46 Map<String, String>twoLetterCodesMap = getTwoLetterCountryCodeMap();25 twoLetterCodesMap = getTwoLetterCountryCodeMap(); 47 26 String name = twoLetterCodesMap.get(langCode.toUpperCase()); 48 27 if (name != null) { … … 50 29 } 51 30 } else if (langCode.length() == 3) { 52 Map<String, String>threeLetterCodesMap = getThreeLetterCountryCodeMap();31 threeLetterCodesMap = getThreeLetterCountryCodeMap(); 53 32 String name = threeLetterCodesMap.get(langCode.toUpperCase()); 54 33 if (name != null) { … … 59 38 return result; 60 39 } 61 62 protected String extractLanguageCode(String value) {63 String result = value;64 if (value.length() != 2 && value.length() != 3) {65 if (value.startsWith(ISO639_3_PREFIX)) {66 result = value.substring(ISO639_3_PREFIX.length());67 } else if (value.startsWith(SIL_CODE_PREFIX) || value.startsWith(SIL_CODE_PREFIX_alt)) {68 result = value.substring(value.lastIndexOf("-")+1);69 Map<String, String> silToISOMap = getSilToIso639Map();70 String isoCode = silToISOMap.get(result.toUpperCase());71 if (isoCode != null) {72 result = isoCode;73 }74 }75 }76 77 // convert ISO 639-2/T codes to ISO 639-378 if (getIso6392TToISO6393Map().containsKey(value.toUpperCase())) {79 result = getIso6392TToISO6393Map().get(value.toUpperCase());80 }81 82 // Convert to lowercase to capture erroneously capitalized language codes in the CMDI files.83 // NOTE: In the mappings themselves we do not capitalize.84 result = result.toLowerCase();85 return result;86 }87 88 public String getLanguageNameForLanguageCode(String langCode) {89 String result = getIso639ToLanguageNameMap().get(langCode);90 91 if(result == null)92 result = langCode;93 94 return result;95 }96 97 protected Map<String, String> getSilToIso639Map() {98 if (silToIso639Map == null) {99 silToIso639Map = createSilToIsoCodeMap();100 }101 return silToIso639Map;102 }103 104 private Map<String, String> getTwoLetterCountryCodeMap() {105 if (twoLetterCodesMap == null) {106 twoLetterCodesMap = createCodeMap(MetadataImporter.config.getLanguage2LetterCodeComponentUrl());107 }108 return twoLetterCodesMap;109 }110 111 private Map<String, String> getThreeLetterCountryCodeMap() {112 if (threeLetterCodesMap == null) {113 threeLetterCodesMap = createCodeMap(MetadataImporter.config.getLanguage3LetterCodeComponentUrl());114 }115 return threeLetterCodesMap;116 }117 118 protected Map<String, String> getLanguageNameToIso639Map() {119 if (languageNameToIso639Map == null) {120 languageNameToIso639Map = createReverseCodeMap(MetadataImporter.config.getLanguage3LetterCodeComponentUrl());121 }122 return languageNameToIso639Map;123 }124 125 private Map<String, String> getIso639ToLanguageNameMap() {126 if (iso639ToLanguageNameMap == null) {127 iso639ToLanguageNameMap = createCodeMap(MetadataImporter.config.getLanguage3LetterCodeComponentUrl());128 }129 130 return iso639ToLanguageNameMap;131 }132 133 /**134 * Returns map of ISO 639-2/B codes to ISO 639-3135 *136 * It is strongly advised to use ISO 639-3 codes, the support for ISO 639-2 may be discontinued in the future137 *138 * @return map of ISO 639-2/B codes to ISO 639-3139 */140 private Map<String, String> getIso6392TToISO6393Map() {141 if (iso639_2TToISO639_3Map == null) {142 iso639_2TToISO639_3Map = new HashMap<String, String>();143 iso639_2TToISO639_3Map.put("ALB", "SQI");144 iso639_2TToISO639_3Map.put("ARM", "HYE");145 iso639_2TToISO639_3Map.put("BAQ", "EUS");146 iso639_2TToISO639_3Map.put("BUR", "MYA");147 iso639_2TToISO639_3Map.put("CZE", "CES");148 iso639_2TToISO639_3Map.put("CHI", "ZHO");149 iso639_2TToISO639_3Map.put("DUT", "NLD");150 iso639_2TToISO639_3Map.put("FRE", "FRA");151 iso639_2TToISO639_3Map.put("GEO", "KAT");152 iso639_2TToISO639_3Map.put("GER", "DEU");153 iso639_2TToISO639_3Map.put("GRE", "ELL");154 iso639_2TToISO639_3Map.put("ICE", "ISL");155 iso639_2TToISO639_3Map.put("MAC", "MKD");156 iso639_2TToISO639_3Map.put("MAO", "MRI");157 iso639_2TToISO639_3Map.put("MAY", "MSA");158 iso639_2TToISO639_3Map.put("PER", "FAS");159 iso639_2TToISO639_3Map.put("RUM", "RON");160 iso639_2TToISO639_3Map.put("SLO", "SLK");161 iso639_2TToISO639_3Map.put("TIB", "BOD");162 iso639_2TToISO639_3Map.put("WEL", "CYM");163 }164 165 return iso639_2TToISO639_3Map;166 }167 168 private Map<String, String> createCodeMap(String url) {169 LOG.debug("Creating language code map.");170 try {171 Map<String, String> result = CommonUtils.createCMDIComponentItemMap(url);172 return result;173 } catch (Exception e) {174 throw new RuntimeException("Cannot instantiate postProcessor:", e);175 }176 }177 178 private Map<String, String> createReverseCodeMap(String url) {179 LOG.debug("Creating language code map.");180 try {181 Map<String, String> result = CommonUtils.createReverseCMDIComponentItemMap(url);182 return result;183 } catch (Exception e) {184 throw new RuntimeException("Cannot instantiate postProcessor:", e);185 }186 }187 188 private Map<String, String> createSilToIsoCodeMap() {189 LOG.debug("Creating silToIso code map.");190 try {191 Map<String, String> result = new HashMap<String, String>();192 DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();193 domFactory.setNamespaceAware(true);194 URL url = new URL(MetadataImporter.config.getSilToISO639CodesUrl());195 DocumentBuilder builder = domFactory.newDocumentBuilder();196 Document doc = builder.parse(url.openStream());197 XPath xpath = XPathFactory.newInstance().newXPath();198 NodeList nodeList = (NodeList) xpath.evaluate("//lang", doc, XPathConstants.NODESET);199 for (int i = 0; i < nodeList.getLength(); i++) {200 Node node = nodeList.item(i);201 String silCode = node.getFirstChild().getTextContent();202 String isoCode = node.getLastChild().getTextContent();203 result.put(silCode.toUpperCase(), isoCode);204 }205 return result;206 } catch (Exception e) {207 throw new RuntimeException("Cannot instantiate postProcessor:", e);208 }209 }210 211 40 } -
vlo/trunk/vlo-importer/src/main/java/eu/clarin/cmdi/vlo/importer/MetadataImporter.java
r4766 r5143 76 76 POST_PROCESSORS.put(FacetConstants.FIELD_CONTINENT, new ContinentNamePostProcessor()); 77 77 POST_PROCESSORS.put(FacetConstants.FIELD_COUNTRY, new CountryNamePostProcessor()); 78 POST_PROCESSORS.put(FacetConstants.FIELD_LANGUAGE, new Language CodePostProcessor());78 POST_PROCESSORS.put(FacetConstants.FIELD_LANGUAGE, new LanguageNamePostProcessor()); 79 79 POST_PROCESSORS.put(FacetConstants.FIELD_LANGUAGES, new LanguageLinkPostProcessor()); 80 POST_PROCESSORS.put(FacetConstants.FIELD_LANGUAGE_CODE, new LanguageCodePostProcessor()); 80 81 POST_PROCESSORS.put(FacetConstants.FIELD_YEAR, new YearPostProcessor()); 81 82 POST_PROCESSORS.put(FacetConstants.FIELD_NATIONAL_PROJECT, new NationalProjectPostProcessor()); -
vlo/trunk/vlo-importer/src/test/java/eu/clarin/cmdi/vlo/importer/CMDIDataProcessorTest.java
r5091 r5143 390 390 SolrInputDocument doc = data.getSolrDocument(); 391 391 assertNotNull(doc); 392 assertEquals(1 5, doc.getFieldNames().size());392 assertEquals(16, doc.getFieldNames().size()); 393 393 assertEquals("test-hdl:1839/00-0000-0000-0009-294C-9", doc.getFieldValue("_selfLink")); 394 394 assertEquals("kleve-route", doc.getFieldValue("name")); … … 593 593 SolrInputDocument doc = data.getSolrDocument(); 594 594 assertNotNull(doc); 595 assertEquals(1 0, doc.getFieldNames().size());595 assertEquals(11, doc.getFieldNames().size()); 596 596 assertEquals("oai:ailla.utexas.edu:1", doc.getFieldValue("_selfLink")); 597 597 assertEquals(null, doc.getFieldValue("name")); … … 902 902 SolrInputDocument doc = data.getSolrDocument(); 903 903 assertNotNull(doc); 904 assertEquals(1 0, doc.getFieldNames().size());904 assertEquals(11, doc.getFieldNames().size()); 905 905 assertEquals("clarin.eu:lrt:433", doc.getFieldValue("_selfLink")); 906 906 assertEquals("Corpus of Present-day Written Estonian", doc.getFieldValue("name")); -
vlo/trunk/vlo-importer/src/test/java/eu/clarin/cmdi/vlo/importer/FacetMappingFactoryTest.java
r5091 r5143 35 35 36 36 List<FacetConfiguration> facets = facetMapping.getFacets(); 37 assertEquals(2 1, facets.size());37 assertEquals(22, facets.size()); 38 38 39 39 int index = 0; … … 101 101 assertEquals("/c:CMD/c:Components/c:Session/c:Resources/c:WrittenResource/c:LanguageId/text()", 102 102 mapping.getPatterns().get(1)); 103 mapping = facets.get(index++); 104 105 assertEquals(FacetConstants.FIELD_LANGUAGE_CODE, mapping.getName()); 103 106 mapping = facets.get(index++); 104 107 … … 182 185 183 186 List<FacetConfiguration> facets = facetMapping.getFacets(); 184 assertEquals( 19, facets.size());187 assertEquals(20, facets.size()); 185 188 186 189 int index = 0; … … 238 241 mapping.getPatterns().get(0)); 239 242 mapping = facets.get(index++); 243 244 assertEquals(FacetConstants.FIELD_LANGUAGE_CODE, mapping.getName()); 245 mapping = facets.get(index++); 240 246 241 247 assertEquals(FacetConstants.FIELD_LANGUAGES, mapping.getName()); … … 308 314 309 315 List<FacetConfiguration> facets = facetMapping.getFacets(); 310 assertEquals(1 8, facets.size());316 assertEquals(19, facets.size()); 311 317 312 318 int index = 0; … … 371 377 "/c:CMD/c:Components/c:LrtInventoryResource/c:LrtCommon/c:Languages/c:ISO639/c:iso-639-3-code/text()", 372 378 mapping.getPatterns().get(0)); 379 mapping = facets.get(index++); 380 381 assertEquals(FacetConstants.FIELD_LANGUAGE_CODE, mapping.getName()); 373 382 mapping = facets.get(index++); 374 383 … … 469 478 470 479 471 facet = facets.get(1 4);480 facet = facets.get(15); 472 481 assertEquals(FacetConstants.FIELD_DESCRIPTION, facet.getName()); 473 482 assertEquals(1, facet.getPatterns().size()); -
vlo/trunk/vlo-importer/src/test/java/eu/clarin/cmdi/vlo/importer/LanguageCodePostProcessorTest.java
r5098 r5143 19 19 @Test 20 20 public void testLanguageCode() { 21 LanguageCodePostProcessor processor = new LanguageCodePostProcessor();22 assertEquals(" Dutch", processor.process("NL"));23 assertEquals(" English", processor.process("en"));24 assertEquals(" Dutch", processor.process("nl"));25 assertEquals(" Western Frisian", processor.process("fry"));21 PostProcessor processor = new LanguageCodePostProcessor(); 22 assertEquals("nld", processor.process("NL")); 23 assertEquals("eng", processor.process("en")); 24 assertEquals("nld", processor.process("nl")); 25 assertEquals("fry", processor.process("fry")); 26 26 assertEquals("test", processor.process("test")); 27 27 assertEquals("", processor.process("")); 28 28 assertEquals(null, processor.process(null)); 29 assertEquals(" French", processor.process("ISO639-3:fra"));30 assertEquals(" German", processor.process("RFC1766:x-sil-GER"));29 assertEquals("fra", processor.process("ISO639-3:fra")); 30 assertEquals("deu", processor.process("RFC1766:x-sil-GER")); 31 31 assertEquals("RFC1766:sgn-NL", processor.process("RFC1766:sgn-NL")); 32 assertEquals("Basque", processor.process("baq")); 32 assertEquals("eus", processor.process("baq")); 33 assertEquals("eng", processor.process("eng")); 34 assertEquals("eng", processor.process("English")); 35 assertEquals("deu", processor.process("German")); 36 assertEquals("esn", processor.process("Salvadoran Sign Language")); 33 37 } 34 38 -
vlo/trunk/vlo-importer/src/test/java/eu/clarin/cmdi/vlo/importer/LanguageLinkPostProcessorTest.java
r5137 r5143 6 6 import org.junit.Test; 7 7 8 public class Language CodePostProcessorTest extends ImporterTestcase {8 public class LanguageLinkPostProcessorTest extends ImporterTestcase { 9 9 10 10 @Before … … 18 18 19 19 @Test 20 public void testLanguageCode() { 21 LanguageCodePostProcessor processor = new LanguageCodePostProcessor(); 22 assertEquals("Dutch", processor.process("NL")); 23 assertEquals("English", processor.process("en")); 24 assertEquals("Dutch", processor.process("nl")); 25 assertEquals("Western Frisian", processor.process("fry")); 26 assertEquals("test", processor.process("test")); 27 assertEquals("", processor.process("")); 28 assertEquals(null, processor.process(null)); 29 assertEquals("French", processor.process("ISO639-3:fra")); 30 assertEquals("German", processor.process("RFC1766:x-sil-GER")); 31 assertEquals("RFC1766:sgn-NL", processor.process("RFC1766:sgn-NL")); 32 assertEquals("Basque", processor.process("baq")); 20 public void testLanguageLink() { 21 PostProcessor processor = new LanguageLinkPostProcessor(); 22 assertEquals("<a href=\"http://infra.clarin.eu/service/language/info.php?code=nld\">Dutch</a>", processor.process("nld")); 33 23 } 34 35 24 } -
vlo/trunk/vlo-importer/src/test/java/eu/clarin/cmdi/vlo/importer/LanguageNamePostProcessorTest.java
r5137 r5143 6 6 import org.junit.Test; 7 7 8 public class Language CodePostProcessorTest extends ImporterTestcase {8 public class LanguageNamePostProcessorTest extends ImporterTestcase { 9 9 10 10 @Before … … 19 19 @Test 20 20 public void testLanguageCode() { 21 LanguageCodePostProcessor processor = new LanguageCodePostProcessor();21 PostProcessor processor = new LanguageNamePostProcessor(); 22 22 assertEquals("Dutch", processor.process("NL")); 23 23 assertEquals("English", processor.process("en")); -
vlo/trunk/vlo-importer/src/test/resources/facetConceptsTest.xml
r5091 r5143 93 93 </facetConcept> 94 94 <facetConcept name="language" description="Language of the content of the resource"> 95 <concept>http://www.isocat.org/datcat/DC-2482</concept> <!-- language code --> 96 <concept>http://www.isocat.org/datcat/DC-2484</concept> <!-- language name --> 97 <concept>http://www.isocat.org/datcat/DC-5361</concept> <!-- language usage (TEI) --> 98 <concept>http://www.isocat.org/datcat/DC-5358</concept> <!--language (TEI) - MENZO: added for CLARIN-DK --> 99 100 <concept>http://purl.org/dc/terms/language</concept> 101 <!-- Had to be removed due to IDSAGD_Speaker <acceptableContext includeAny="false" includeEmpty="true"/> 102 <rejectableContext includeAny="true" includeEmpty="false"> 103 <concept>http://www.isocat.org/datcat/DC-4146</concept> 104 </rejectableContext> --> 105 <pattern>/c:CMD/c:Components//c:OLAC-DcmiTerms/c:language/@olac-language</pattern> 106 <pattern>/c:CMD/c:Components//c:OLAC-DcmiTerms/c:subject/@olac-language</pattern> 107 <pattern>/c:CMD/c:Components/c:LrtInventoryResource/c:LrtCommon/c:Languages/c:ISO639/c:iso-639-3-code/text()</pattern> 108 <pattern>/c:CMD/c:Components/c:mods/c:language/c:languageTerm/text()</pattern> 109 110 111 <!-- <concept>http://www.isocat.org/datcat/DC-2468</concept> --> <!-- BAS REMOVED because of component SubjectLanguages --> 112 113 <pattern>/c:CMD/c:Components//c:OLAC-DcmiTerms-ref/c:language/@olac-language</pattern> <!-- IDS OLAC-DcmiTerms-ref --> 114 <pattern>/c:CMD/c:Components//c:OLAC-DcmiTerms-ref-DWR/c:language/@olac-language</pattern> <!-- IDS OLAC-DcmiTerms-ref-DWR --> 115 <!-- <pattern>/c:CMD/c:Components/c:WebLichtWebService/c:Service/c:Operations/c:Operation/c:Input/c:ParameterGroup/c:Parameters/c:Parameter/c:Values/c:ParameterValue/c:Value/text()</pattern>--> <!-- WeblichtWebservices, breaks TÃŒbingen Resources (oai_sfs_uni_tuebingen_de_FID_15.xml)--> 116 <blacklistPattern>/c:Actor_Languages/c:Actor_Language/</blacklistPattern> <!-- MPI (JK) --> 117 <blacklistPattern>/c:ActorLanguage/c:Language/c:ISO639/c:iso-639-3-code</blacklistPattern> 118 <blacklistPattern>/c:LanguageName</blacklistPattern> <!-- general --> 119 <blacklistPattern>/c:CMD/c:Components/c:IDSAGD_Speaker/c:LinguisticBackground</blacklistPattern> <!-- IDS IDSAGD_Speaker --> 120 <blacklistPattern>/c:CMD/c:Components/c:IDSAGD_Corpus/c:Collection/c:DocumentationLanguages</blacklistPattern> <!-- IDS IDSAGD_Corpus --> 121 <blacklistPattern>/c:CMD/c:Components/c:TextCorpusProfile/c:Documentations</blacklistPattern> <!-- IMS TextCorpusProfile --> 122 <blacklistPattern>/c:CMD/c:Components/c:TextCorpusProfile/c:DocumentationLanguages</blacklistPattern> <!-- IMS TextCorpusProfile --> 123 <blacklistPattern>/c:CMD/c:Components/c:ToolProfile/c:Documentations</blacklistPattern> <!-- IMS ToolProfile --> 124 <blacklistPattern>/c:CMD/c:Components/c:ToolProfile/c:ToolContext/c:DocumentationLanguages</blacklistPattern> <!-- IMS ToolProfile --> 125 <blacklistPattern>/c:CMD/c:Components/c:LexicalResourceProfile/c:DocumentationLanguages</blacklistPattern> <!-- IMS c:LexicalResourceProfile --> 126 <blacklistPattern>/c:CMD/c:Components/c:MultimodalCorpus/c:DocumentationLanguages</blacklistPattern> <!-- IMS c:MultimodalCorpus --> 127 </facetConcept> 128 <facetConcept name="languageCode" description="Language of the content of the resource"> 95 129 <concept>http://www.isocat.org/datcat/DC-2482</concept> <!-- language code --> 96 130 <concept>http://www.isocat.org/datcat/DC-2484</concept> <!-- language name -->
Note: See TracChangeset
for help on using the changeset viewer.