Changeset 5188


Ignore:
Timestamp:
05/12/14 15:08:42 (10 years ago)
Author:
teckart@informatik.uni-leipzig.de
Message:

Changed content of facet "languageCode". This facet will be renamed to "language" as soon as the current facets "language" and "languages" are not needed anymore. The facet now contains the ISO 639-3 code (with the prefix "code:") or the language name (with the prefix "name:") if the language code could not be identified.

Location:
vlo/trunk/vlo-importer/src
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • vlo/trunk/vlo-importer/src/main/java/eu/clarin/cmdi/vlo/importer/LanguageCodePostProcessor.java

    r5143 r5188  
    2323    private final static Logger LOG = LoggerFactory.getLogger(LanguageCodePostProcessor.class);
    2424
     25    protected static final String CODE_PREFIX = "code:";
     26    protected static final String LANG_NAME_PREFIX = "name:";
    2527    protected static final String ISO639_3_PREFIX = "ISO639-3:";
    2628    protected static final String SIL_CODE_PREFIX = "RFC1766:x-sil-";
     
    5153        String result = value;
    5254       
     55        // input is already ISO 639-3?
     56        if(getIso639ToLanguageNameMap().keySet().contains(value.toUpperCase()))
     57            return CODE_PREFIX + value.toLowerCase();
     58       
    5359        // deal with prefixes or language names
    5460        if (value.length() != 2 && value.length() != 3) {
    5561            if (value.startsWith(ISO639_3_PREFIX)) {
    56                 return value.substring(ISO639_3_PREFIX.length()).toLowerCase();
     62                return CODE_PREFIX + value.substring(ISO639_3_PREFIX.length()).toLowerCase();
    5763            } else if (value.startsWith(SIL_CODE_PREFIX) || value.startsWith(SIL_CODE_PREFIX_alt)) {
    5864                result = value.substring(value.lastIndexOf("-")+1);
     
    6066                String isoCode = silToIso639Map.get(result.toLowerCase());
    6167                if (isoCode != null) {
    62                     result = isoCode;
     68                    result = CODE_PREFIX + isoCode;
    6369                }
    6470            } else if(getLanguageNameToIso639Map().containsKey(value)) { // (english) language name?
    65                 return getLanguageNameToIso639Map().get(value);
     71                return CODE_PREFIX + getLanguageNameToIso639Map().get(value);
    6672            }
    6773        }
     
    7177            if(silToIso639Map == null)
    7278                silToIso639Map = getSilToIso639Map();
    73             result = silToIso639Map.get(result.toLowerCase());
     79            result = CODE_PREFIX + silToIso639Map.get(result.toLowerCase());
    7480        }
    7581       
    7682        // convert ISO 639-2/T codes to ISO 639-3
    7783        if (getIso6392TToISO6393Map().containsKey(value.toLowerCase())) {
    78             result = getIso6392TToISO6393Map().get(value.toLowerCase());
    79         }
    80        
     84            result = CODE_PREFIX + getIso6392TToISO6393Map().get(value.toLowerCase());
     85        }
     86       
     87        // language code not identified? -> language name
     88        if(!result.startsWith(CODE_PREFIX) && !result.equals(""))
     89            result = LANG_NAME_PREFIX + result;
    8190        return result;
    8291    }
  • vlo/trunk/vlo-importer/src/main/java/eu/clarin/cmdi/vlo/importer/LanguageLinkPostProcessor.java

    r5143 r5188  
    1313        String result = langCode;
    1414        if (langCode != null) {
    15             if(langCode.length() == 3)
     15            if(langCode.startsWith(CODE_PREFIX)) {
     16                langCode = langCode.substring(CODE_PREFIX.length());
    1617                result = "<a href=\""+ MetadataImporter.config.getLanguageLinkPrefix() + langCode+"\">"+getLanguageNameForLanguageCode(langCode.toUpperCase())+"</a>";
     18            } else if(langCode.startsWith(LANG_NAME_PREFIX)) {
     19                result = langCode.substring(LANG_NAME_PREFIX.length());
     20            }
    1721        }
    1822        return result;
  • vlo/trunk/vlo-importer/src/main/java/eu/clarin/cmdi/vlo/importer/LanguageNamePostProcessor.java

    r5143 r5188  
    2222        if (value != null) {
    2323            String langCode = extractLanguageCode(value);
     24            if(langCode.startsWith(CODE_PREFIX))
     25                langCode = langCode.substring(CODE_PREFIX.length());
     26           
    2427            if (langCode.length() == 2) {
    2528                twoLetterCodesMap = getTwoLetterCountryCodeMap();
  • vlo/trunk/vlo-importer/src/test/java/eu/clarin/cmdi/vlo/importer/LanguageCodePostProcessorTest.java

    r5143 r5188  
    2020    public void testLanguageCode() {
    2121        PostProcessor processor = new LanguageCodePostProcessor();
    22         assertEquals("nld", processor.process("NL"));
    23         assertEquals("eng", processor.process("en"));
    24         assertEquals("nld", processor.process("nl"));
    25         assertEquals("fry", processor.process("fry"));
    26         assertEquals("test", processor.process("test"));
     22        assertEquals("code:nld", processor.process("NL"));
     23        assertEquals("code:eng", processor.process("en"));
     24        assertEquals("code:nld", processor.process("nl"));
     25        assertEquals("code:fry", processor.process("fry"));
     26        assertEquals("name:test", processor.process("test"));
    2727        assertEquals("", processor.process(""));
    2828        assertEquals(null, processor.process(null));
    29         assertEquals("fra", processor.process("ISO639-3:fra"));
    30         assertEquals("deu", processor.process("RFC1766:x-sil-GER"));
    31         assertEquals("RFC1766:sgn-NL", processor.process("RFC1766:sgn-NL"));
    32         assertEquals("eus", processor.process("baq"));
    33         assertEquals("eng", processor.process("eng"));
    34         assertEquals("eng", processor.process("English"));
    35         assertEquals("deu", processor.process("German"));
    36         assertEquals("esn", processor.process("Salvadoran Sign Language"));
     29        assertEquals("code:fra", processor.process("ISO639-3:fra"));
     30        assertEquals("code:deu", processor.process("RFC1766:x-sil-GER"));
     31        assertEquals("name:RFC1766:sgn-NL", processor.process("RFC1766:sgn-NL"));
     32        assertEquals("code:eus", processor.process("baq"));
     33        assertEquals("code:eng", processor.process("eng"));
     34        assertEquals("code:eng", processor.process("English"));
     35        assertEquals("code:deu", processor.process("German"));
     36        assertEquals("code:esn", processor.process("Salvadoran Sign Language"));
    3737    }
    3838
Note: See TracChangeset for help on using the changeset viewer.