Changeset 6653
- Timestamp:
- 10/07/15 10:51:24 (9 years ago)
- Location:
- vlo/trunk/vlo-importer/src
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
vlo/trunk/vlo-importer/src/main/java/eu/clarin/cmdi/vlo/importer/CMDIParserVTDXML.java
r6387 r6653 29 29 private final static Logger LOG = LoggerFactory.getLogger(CMDIParserVTDXML.class); 30 30 31 private static final String DEFAULT_LANGUAGE = " und";31 private static final String DEFAULT_LANGUAGE = "code:und"; 32 32 33 33 public CMDIParserVTDXML(Map<String, PostProcessor> postProcessors, Boolean useLocalXSDCache) { … … 285 285 286 286 // ignore non-English language names for facet LANGUAGE_CODE 287 if (config.getName().equals(FacetConstants.FIELD_LANGUAGE_CODE) && !languageCode.equals(" en") && !languageCode.equals("eng") && !languageCode.equals("und")) {287 if (config.getName().equals(FacetConstants.FIELD_LANGUAGE_CODE) && !languageCode.equals("code:eng") && !languageCode.equals("code:und")) { 288 288 index = ap.evalXPath(); 289 289 continue; … … 314 314 // extract language code in xml:lang if available 315 315 Integer langAttrIndex = nav.getAttrVal("xml:lang"); 316 String languageCode = DEFAULT_LANGUAGE;316 String languageCode; 317 317 if (langAttrIndex != -1) { 318 318 languageCode = nav.toString(langAttrIndex).trim(); 319 } 320 // replace 2-letter with 3-letter codes 321 if (MetadataImporter.languageCodeUtils.getSilToIso639Map().containsKey(languageCode)) { 322 languageCode = MetadataImporter.languageCodeUtils.getSilToIso639Map().get(languageCode); 323 } 324 return languageCode; 319 } else { 320 return DEFAULT_LANGUAGE; 321 } 322 323 return postProcessors.get(FacetConstants.FIELD_LANGUAGE_CODE).process(languageCode).get(0); 325 324 } 326 325 … … 332 331 String fieldValue = valueList.get(i).trim(); 333 332 if (name.equals(FacetConstants.FIELD_DESCRIPTION)) { 334 fieldValue = "{ lang='" + languageCode + "'}" + fieldValue;333 fieldValue = "{" + languageCode + "}" + fieldValue; 335 334 } 336 335 cmdiData.addDocField(name, fieldValue, caseInsensitive); -
vlo/trunk/vlo-importer/src/test/java/eu/clarin/cmdi/vlo/importer/CMDIDataProcessorTest.java
r6208 r6653 402 402 assertEquals("demo", doc.getFieldValue("genre")); 403 403 assertEquals( 404 "{ lang='eng'}This recording was made to generate a freely available test resource including speech and gestures. The annotations were created by Peter and Kita who is gesture researcher at the MPI for Psycholinguistics.",404 "{code:eng}This recording was made to generate a freely available test resource including speech and gestures. The annotations were created by Peter and Kita who is gesture researcher at the MPI for Psycholinguistics.", 405 405 doc.getFieldValue("description")); 406 406 assertEquals("2002-10-30", doc.getFieldValue("temporalCoverage")); … … 519 519 assertEquals("Netherlands", doc.getFieldValue("country")); 520 520 assertEquals("demo", doc.getFieldValue("genre")); 521 assertEquals("{ lang='und'}Test.", doc.getFieldValue("description"));521 assertEquals("{code:und}Test.", doc.getFieldValue("description")); 522 522 assertEquals("Should be null not empty string", null, doc.getFieldValue("organisation")); 523 523 assertEquals(null, doc.getFieldValue("language")); … … 597 597 List<String> descriptions = new ArrayList(fieldValues); 598 598 Collections.sort(descriptions); 599 assertEquals("{ lang='und'}Channel: Talking;\n Genre: Traditional Narrative / Story;\n Country: Panama;\n"599 assertEquals("{code:und}Channel: Talking;\n Genre: Traditional Narrative / Story;\n Country: Panama;\n" 600 600 + " Place of Recording: Mulatuppu;\n Event: Community Gathering;\n" 601 601 + " Institutional Affiliation: University of Texas at Austin;\n Participant Information: Political Leader;", descriptions.get(0).toString()); 602 assertEquals("{ lang='und'}Test", descriptions.get(1).toString());603 assertEquals("{ lang='und'}The one-eyed grandmother is one of many traditional Kuna stories performed "602 assertEquals("{code:und}Test", descriptions.get(1).toString()); 603 assertEquals("{code:und}The one-eyed grandmother is one of many traditional Kuna stories performed " 604 604 + "in the Kuna gathering house. This story, performed here by Pedro Arias, combines " 605 605 + "European derived motifs (Tom Thumb and Hansel and Gretel) with themes that seem more " … … 878 878 assertEquals(null, doc.getFieldValue("year")); 879 879 assertEquals(null, doc.getFieldValue("genre")); 880 assertEquals("{ lang='eng'}written general; 95 mio words; TEI/SGML", doc.getFieldValue("description"));880 assertEquals("{code:eng}written general; 95 mio words; TEI/SGML", doc.getFieldValue("description")); 881 881 assertEquals("Written Corpus", doc.getFieldValue(FacetConstants.FIELD_RESOURCE_CLASS)); 882 882 }
Note: See TracChangeset
for help on using the changeset viewer.