Changeset 5997
- Timestamp:
- 02/19/15 12:40:19 (9 years ago)
- Location:
- vlo/trunk
- Files:
-
- 6 edited
- 1 moved
Legend:
- Unmodified
- Added
- Removed
-
vlo/trunk/vlo-commons/src/main/java/eu/clarin/cmdi/vlo/LanguageCodeUtils.java
r5994 r5997 15 15 * along with this program. If not, see <http://www.gnu.org/licenses/>. 16 16 */ 17 package eu.clarin.cmdi.vlo .importer;17 package eu.clarin.cmdi.vlo; 18 18 19 19 import eu.clarin.cmdi.vlo.CommonUtils; 20 import eu.clarin.cmdi.vlo.config.VloConfig; 20 21 import java.net.URL; 21 22 import java.util.HashMap; … … 33 34 34 35 /** 35 * Some helper methods for working with language codes, extracted from {@link LanguageCodePostProcessor} 36 * Some helper methods for working with language codes, extracted from 37 * {@link LanguageCodePostProcessor} 38 * 36 39 * @author Thomas Eckart 37 40 */ 38 41 public class LanguageCodeUtils { 42 39 43 private final static Logger LOG = LoggerFactory.getLogger(LanguageCodeUtils.class); 40 44 41 private staticMap<String, String> twoLetterCodesMap;42 private staticMap<String, String> threeLetterCodesMap;43 private staticMap<String, String> silToIso639Map;44 private staticMap<String, String> languageNameToIso639Map;45 private staticMap<String, String> iso639ToLanguageNameMap;46 private staticMap<String, String> iso639_2TToISO639_3Map;45 private Map<String, String> twoLetterCodesMap; 46 private Map<String, String> threeLetterCodesMap; 47 private Map<String, String> silToIso639Map; 48 private Map<String, String> languageNameToIso639Map; 49 private Map<String, String> iso639ToLanguageNameMap; 50 private Map<String, String> iso639_2TToISO639_3Map; 47 51 48 public static String getLanguageNameForLanguageCode(String langCode) { 52 private final VloConfig config; 53 54 public LanguageCodeUtils(VloConfig config) { 55 this.config = config; 56 } 57 58 public String getLanguageNameForLanguageCode(String langCode) { 49 59 String result = getIso639ToLanguageNameMap().get(langCode); 50 60 … … 56 66 } 57 67 58 public staticMap<String, String> getSilToIso639Map() {68 public Map<String, String> getSilToIso639Map() { 59 69 if (silToIso639Map == null) { 60 70 silToIso639Map = createSilToIsoCodeMap(); … … 63 73 } 64 74 65 public staticMap<String, String> getTwoLetterCountryCodeMap() {75 public Map<String, String> getTwoLetterCountryCodeMap() { 66 76 if (twoLetterCodesMap == null) { 67 twoLetterCodesMap = createCodeMap( MetadataImporter.config.getLanguage2LetterCodeComponentUrl());77 twoLetterCodesMap = createCodeMap(config.getLanguage2LetterCodeComponentUrl()); 68 78 } 69 79 return twoLetterCodesMap; 70 80 } 71 81 72 public staticMap<String, String> getThreeLetterCountryCodeMap() {82 public Map<String, String> getThreeLetterCountryCodeMap() { 73 83 if (threeLetterCodesMap == null) { 74 threeLetterCodesMap = createCodeMap( MetadataImporter.config.getLanguage3LetterCodeComponentUrl());84 threeLetterCodesMap = createCodeMap(config.getLanguage3LetterCodeComponentUrl()); 75 85 } 76 86 return threeLetterCodesMap; 77 87 } 78 88 79 public staticMap<String, String> getLanguageNameToIso639Map() {89 public Map<String, String> getLanguageNameToIso639Map() { 80 90 if (languageNameToIso639Map == null) { 81 languageNameToIso639Map = createReverseCodeMap( MetadataImporter.config.getLanguage3LetterCodeComponentUrl());91 languageNameToIso639Map = createReverseCodeMap(config.getLanguage3LetterCodeComponentUrl()); 82 92 } 83 93 return languageNameToIso639Map; 84 94 } 85 95 86 public staticMap<String, String> getIso639ToLanguageNameMap() {96 public Map<String, String> getIso639ToLanguageNameMap() { 87 97 if (iso639ToLanguageNameMap == null) { 88 iso639ToLanguageNameMap = createCodeMap( MetadataImporter.config.getLanguage3LetterCodeComponentUrl());98 iso639ToLanguageNameMap = createCodeMap(config.getLanguage3LetterCodeComponentUrl()); 89 99 } 90 100 … … 100 110 * @return map of ISO 639-2/B codes to ISO 639-3 101 111 */ 102 public staticMap<String, String> getIso6392TToISO6393Map() {112 public Map<String, String> getIso6392TToISO6393Map() { 103 113 if (iso639_2TToISO639_3Map == null) { 104 114 iso639_2TToISO639_3Map = new HashMap<String, String>(); … … 128 138 } 129 139 130 private staticMap<String, String> createCodeMap(String url) {140 private Map<String, String> createCodeMap(String url) { 131 141 LOG.debug("Creating language code map."); 132 142 try { … … 138 148 } 139 149 140 private staticMap<String, String> createReverseCodeMap(String url) {150 private Map<String, String> createReverseCodeMap(String url) { 141 151 LOG.debug("Creating language code map."); 142 152 try { … … 148 158 } 149 159 150 private staticMap<String, String> createSilToIsoCodeMap() {160 private Map<String, String> createSilToIsoCodeMap() { 151 161 LOG.debug("Creating silToIso code map."); 152 162 try { … … 154 164 DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance(); 155 165 domFactory.setNamespaceAware(true); 156 URL url = new URL( MetadataImporter.config.getSilToISO639CodesUrl());166 URL url = new URL(config.getSilToISO639CodesUrl()); 157 167 DocumentBuilder builder = domFactory.newDocumentBuilder(); 158 168 Document doc = builder.parse(url.openStream()); -
vlo/trunk/vlo-importer/src/main/java/eu/clarin/cmdi/vlo/importer/CMDIParserVTDXML.java
r5985 r5997 28 28 29 29 private static final String DEFAULT_LANGUAGE = "und"; 30 30 31 31 public CMDIParserVTDXML(Map<String, PostProcessor> postProcessors, Boolean useLocalXSDCache) { 32 32 this.postProcessors = postProcessors; … … 244 244 languageCode = nav.toString(langAttrIndex).trim(); 245 245 // replace 2-letter with 3-letter codes 246 if( LanguageCodeUtils.getSilToIso639Map().containsKey(languageCode))247 languageCode = LanguageCodeUtils.getSilToIso639Map().get(languageCode);246 if(MetadataImporter.languageCodeUtils.getSilToIso639Map().containsKey(languageCode)) 247 languageCode = MetadataImporter.languageCodeUtils.getSilToIso639Map().get(languageCode); 248 248 249 249 List<String> valueList = postProcess(config.getName(), value); -
vlo/trunk/vlo-importer/src/main/java/eu/clarin/cmdi/vlo/importer/LanguageCodePostProcessor.java
r5981 r5997 1 1 package eu.clarin.cmdi.vlo.importer; 2 2 3 import eu.clarin.cmdi.vlo.LanguageCodeUtils; 3 4 import org.slf4j.Logger; 4 5 import org.slf4j.LoggerFactory; … … 12 13 13 14 private final static Logger LOG = LoggerFactory.getLogger(LanguageCodePostProcessor.class); 14 15 15 16 protected static final String CODE_PREFIX = "code:"; 16 17 protected static final String LANG_NAME_PREFIX = "name:"; … … 40 41 41 42 protected String extractLanguageCode(String value) { 43 final LanguageCodeUtils languageCodeUtils = MetadataImporter.languageCodeUtils; 44 42 45 String result = value; 43 46 … … 45 48 46 49 // input is already ISO 639-3? 47 if( LanguageCodeUtils.getIso639ToLanguageNameMap().keySet().contains(result.toUpperCase()))50 if(languageCodeUtils.getIso639ToLanguageNameMap().keySet().contains(result.toUpperCase())) 48 51 return CODE_PREFIX + result.toLowerCase(); 49 52 50 53 // input is 2-letter code -> map to ISO 639-3 51 if( LanguageCodeUtils.getSilToIso639Map().containsKey(result.toLowerCase())) {52 return CODE_PREFIX + LanguageCodeUtils.getSilToIso639Map().get(result.toLowerCase());54 if(languageCodeUtils.getSilToIso639Map().containsKey(result.toLowerCase())) { 55 return CODE_PREFIX + languageCodeUtils.getSilToIso639Map().get(result.toLowerCase()); 53 56 } 54 57 55 if( LanguageCodeUtils.getLanguageNameToIso639Map().containsKey(result)) { // (english) language name?56 return CODE_PREFIX + LanguageCodeUtils.getLanguageNameToIso639Map().get(result);58 if(languageCodeUtils.getLanguageNameToIso639Map().containsKey(result)) { // (english) language name? 59 return CODE_PREFIX + languageCodeUtils.getLanguageNameToIso639Map().get(result); 57 60 } 58 61 59 62 // convert ISO 639-2/T codes to ISO 639-3 60 if ( LanguageCodeUtils.getIso6392TToISO6393Map().containsKey(result.toLowerCase())) {61 return CODE_PREFIX + LanguageCodeUtils.getIso6392TToISO6393Map().get(result.toLowerCase());63 if (languageCodeUtils.getIso6392TToISO6393Map().containsKey(result.toLowerCase())) { 64 return CODE_PREFIX + languageCodeUtils.getIso6392TToISO6393Map().get(result.toLowerCase()); 62 65 } 63 66 -
vlo/trunk/vlo-importer/src/main/java/eu/clarin/cmdi/vlo/importer/LanguageLinkPostProcessor.java
r5848 r5997 5 5 6 6 public class LanguageLinkPostProcessor extends LanguageCodePostProcessor { 7 7 8 8 /** 9 * Returns the link to language information 10 * If no mapping is found the original value is returned. 9 * Returns the link to language information If no mapping is found the 10 * original value is returned. 11 * 11 12 * @param value extracted language information 12 13 * @return HTML link to the CLARIN language information page … … 14 15 @Override 15 16 public List<String> process(String value) { 16 17 String langCode = super.process(value).get(0); 17 18 String result = langCode; 18 19 if (langCode != null) { 19 if (langCode.startsWith(CODE_PREFIX)) {20 if (langCode.startsWith(CODE_PREFIX)) { 20 21 langCode = langCode.substring(CODE_PREFIX.length()); 21 result = "<a href=\""+ MetadataImporter.config.getLanguageLinkPrefix() + langCode+"\">"+LanguageCodeUtils.getLanguageNameForLanguageCode(langCode.toUpperCase())+"</a>";22 } else if (langCode.startsWith(LANG_NAME_PREFIX)) {22 result = "<a href=\"" + MetadataImporter.config.getLanguageLinkPrefix() + langCode + "\">" + MetadataImporter.languageCodeUtils.getLanguageNameForLanguageCode(langCode.toUpperCase()) + "</a>"; 23 } else if (langCode.startsWith(LANG_NAME_PREFIX)) { 23 24 result = langCode.substring(LANG_NAME_PREFIX.length()); 24 25 } -
vlo/trunk/vlo-importer/src/main/java/eu/clarin/cmdi/vlo/importer/LanguageNamePostProcessor.java
r5848 r5997 1 1 package eu.clarin.cmdi.vlo.importer; 2 2 3 import eu.clarin.cmdi.vlo.LanguageCodeUtils; 3 4 import java.util.ArrayList; 4 5 import java.util.List; … … 21 22 @Override 22 23 public List<String> process(String value) { 24 final LanguageCodeUtils languageCodeUtils = MetadataImporter.languageCodeUtils; 25 23 26 String result = value; 24 27 if (value != null) { … … 28 31 29 32 if (langCode.length() == 2) { 30 twoLetterCodesMap = LanguageCodeUtils.getTwoLetterCountryCodeMap();33 twoLetterCodesMap = languageCodeUtils.getTwoLetterCountryCodeMap(); 31 34 String name = twoLetterCodesMap.get(langCode.toUpperCase()); 32 35 if (name != null) { … … 34 37 } 35 38 } else if (langCode.length() == 3) { 36 threeLetterCodesMap = LanguageCodeUtils.getThreeLetterCountryCodeMap();39 threeLetterCodesMap = languageCodeUtils.getThreeLetterCountryCodeMap(); 37 40 String name = threeLetterCodesMap.get(langCode.toUpperCase()); 38 41 if (name != null) { -
vlo/trunk/vlo-importer/src/main/java/eu/clarin/cmdi/vlo/importer/MetadataImporter.java
r5979 r5997 1 1 package eu.clarin.cmdi.vlo.importer; 2 2 3 import eu.clarin.cmdi.vlo.LanguageCodeUtils; 3 4 import eu.clarin.cmdi.vlo.CommonUtils; 4 5 import eu.clarin.cmdi.vlo.FacetConstants; … … 439 440 public static VloConfig config; 440 441 442 public static LanguageCodeUtils languageCodeUtils; 443 441 444 /** 442 445 * @param args … … 511 514 final XmlVloConfigFactory configFactory = new XmlVloConfigFactory(configUrl); 512 515 MetadataImporter.config = configFactory.newConfig(); 516 MetadataImporter.languageCodeUtils = new LanguageCodeUtils(MetadataImporter.config); 513 517 514 518 // optionally, modify the configuration here -
vlo/trunk/vlo-importer/src/test/java/eu/clarin/cmdi/vlo/importer/ImporterTestcase.java
r5100 r5997 1 1 package eu.clarin.cmdi.vlo.importer; 2 2 3 import eu.clarin.cmdi.vlo.LanguageCodeUtils; 3 4 import eu.clarin.cmdi.vlo.config.DefaultVloConfigFactory; 4 5 import eu.clarin.cmdi.vlo.config.VloConfig; … … 39 40 MetadataImporter.config.setComponentRegistryRESTURL("http://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/profiles/"); 40 41 config = MetadataImporter.config; 42 43 MetadataImporter.languageCodeUtils = new LanguageCodeUtils(config); 41 44 } 42 45
Note: See TracChangeset
for help on using the changeset viewer.