Ignore:
Timestamp:
10/07/15 10:51:24 (9 years ago)
Author:
teckart@informatik.uni-leipzig.de
Message:

More consistent handling of xml:lang attributes for description values. Now the same schema as for facet languageCode is used to indicate the content's language (#780)

File:
1 edited

Legend:

Unmodified
Added
Removed
  • vlo/trunk/vlo-importer/src/main/java/eu/clarin/cmdi/vlo/importer/CMDIParserVTDXML.java

    r6387 r6653  
    2929    private final static Logger LOG = LoggerFactory.getLogger(CMDIParserVTDXML.class);
    3030
    31     private static final String DEFAULT_LANGUAGE = "und";
     31    private static final String DEFAULT_LANGUAGE = "code:und";
    3232
    3333    public CMDIParserVTDXML(Map<String, PostProcessor> postProcessors, Boolean useLocalXSDCache) {
     
    285285
    286286            // ignore non-English language names for facet LANGUAGE_CODE
    287             if (config.getName().equals(FacetConstants.FIELD_LANGUAGE_CODE) && !languageCode.equals("en") && !languageCode.equals("eng") && !languageCode.equals("und")) {
     287            if (config.getName().equals(FacetConstants.FIELD_LANGUAGE_CODE) && !languageCode.equals("code:eng") && !languageCode.equals("code:und")) {
    288288                index = ap.evalXPath();
    289289                continue;
     
    314314        // extract language code in xml:lang if available
    315315        Integer langAttrIndex = nav.getAttrVal("xml:lang");
    316         String languageCode = DEFAULT_LANGUAGE;
     316        String languageCode;
    317317        if (langAttrIndex != -1) {
    318318            languageCode = nav.toString(langAttrIndex).trim();
    319         }
    320         // replace 2-letter with 3-letter codes
    321         if (MetadataImporter.languageCodeUtils.getSilToIso639Map().containsKey(languageCode)) {
    322             languageCode = MetadataImporter.languageCodeUtils.getSilToIso639Map().get(languageCode);
    323         }
    324         return languageCode;
     319        } else {
     320            return DEFAULT_LANGUAGE;
     321        }
     322
     323        return postProcessors.get(FacetConstants.FIELD_LANGUAGE_CODE).process(languageCode).get(0);
    325324    }
    326325
     
    332331            String fieldValue = valueList.get(i).trim();
    333332            if (name.equals(FacetConstants.FIELD_DESCRIPTION)) {
    334                 fieldValue = "{lang='" + languageCode + "'}" + fieldValue;
     333                fieldValue = "{" + languageCode + "}" + fieldValue;
    335334            }
    336335            cmdiData.addDocField(name, fieldValue, caseInsensitive);
Note: See TracChangeset for help on using the changeset viewer.