source: vlo/trunk/vlo_importer/src/test/java/eu/clarin/cmdi/vlo/importer/CMDIDataProcessorTest.java @ 4072

Last change on this file since 4072 was 4072, checked in by keeloo, 10 years ago

Repaired CMDIDataProcessorTest - removed debug code

File size: 54.8 KB
Line 
1package eu.clarin.cmdi.vlo.importer;
2
3import eu.clarin.cmdi.vlo.FacetConstants;
4import eu.clarin.cmdi.vlo.config.VloConfig;
5import java.io.File;
6import java.util.ArrayList;
7import java.util.Collection;
8import java.util.Collections;
9import java.util.Iterator;
10import java.util.List;
11import org.apache.solr.common.SolrInputDocument;
12import static org.junit.Assert.assertEquals;
13import static org.junit.Assert.assertNotNull;
14import static org.junit.Assert.assertTrue;
15import org.junit.Before;
16import org.junit.Test;
17
18public class CMDIDataProcessorTest extends ImporterTestcase {
19
20    private CMDIDataProcessor getDataParser() {
21        return new CMDIParserVTDXML(MetadataImporter.POST_PROCESSORS);
22    }
23   
24    @Test
25    public void testCreateCMDIDataFromCorpus() throws Exception {
26       
27        // make sure the mapping file for testing is used
28        VloConfig.setFacetConceptsFile("/facetConceptsTest.xml");
29
30        String content = "";
31        content += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
32        content += "<CMD xmlns=\"http://www.clarin.eu/cmd/\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\">\n";
33        content += "   <Header>\n";
34        content += "      <MdCreationDate>2003-01-14</MdCreationDate>\n";
35        content += "      <MdSelfLink>test-hdl:1839/00-0000-0000-0000-0001-D</MdSelfLink>\n";
36        content += "      <MdProfile>clarin.eu:cr1:p_1274880881885</MdProfile>\n";
37        content += "   </Header>\n";
38        content += "   <Resources>\n";
39        content += "      <ResourceProxyList>\n";
40        content += "         <ResourceProxy id=\"d28635e19\">\n";
41        content += "            <ResourceType>Metadata</ResourceType>\n";
42        content += "            <ResourceRef>../acqui_data/Corpusstructure/acqui.imdi.cmdi</ResourceRef>\n";
43        content += "         </ResourceProxy>\n";
44        content += "         <ResourceProxy id=\"d28635e23\">\n";
45        content += "            <ResourceType>Metadata</ResourceType>\n";
46        content += "            <ResourceRef>../Comprehension/Corpusstructure/comprehension.imdi.cmdi</ResourceRef>\n";
47        content += "         </ResourceProxy>\n";
48        content += "         <ResourceProxy id=\"d28635e26\">\n";
49        content += "            <ResourceType>Metadata</ResourceType>\n";
50        content += "            <ResourceRef>../lac_data/Corpusstructure/lac.imdi.cmdi</ResourceRef>\n";
51        content += "         </ResourceProxy>\n";
52        content += "      </ResourceProxyList>\n";
53        content += "      <JournalFileProxyList/>\n";
54        content += "      <ResourceRelationList/>\n";
55        content += "   </Resources>\n";
56        content += "   <Components>\n";
57        content += "      <imdi-corpus>\n";
58        content += "         <Corpus>\n";
59        content += "            <Name>MPI corpora</Name>\n";
60        content += "            <Title>Corpora of the Max-Planck Institute for Psycholinguistics</Title>\n";
61        content += "            <CorpusLink Name=\"Acquisition\">../acqui_data/Corpusstructure/acqui.imdi</CorpusLink>\n";
62        content += "            <CorpusLink Name=\"Comprehension\">../Comprehension/Corpusstructure/comprehension.imdi</CorpusLink>\n";
63        content += "            <CorpusLink Name=\"Language and Cognition\">../lac_data/Corpusstructure/lac.imdi</CorpusLink>\n";
64        content += "            <descriptions>\n";
65        content += "               <Description LanguageId=\"\">IMDI corpora</Description>\n";
66        content += "               <Description LanguageId=\"\"/>\n";
67        content += "            </descriptions>\n";
68        content += "         </Corpus>\n";
69        content += "      </imdi-corpus>\n";
70        content += "   </Components>\n";
71        content += "</CMD>\n";
72        File cmdiFile = createCmdiFile("testCorpus", content);
73        CMDIDataProcessor processor = getDataParser();
74        CMDIData data = processor.process(cmdiFile);
75        assertEquals("test-hdl:1839/00-0000-0000-0000-0001-D", data.getId());
76        List<Resource> resources = data.getMetadataResources();
77        assertEquals(3, resources.size());
78        Resource res = resources.get(0);
79        assertEquals("../acqui_data/Corpusstructure/acqui.imdi.cmdi", res.getResourceName());
80        assertEquals(null, res.getMimeType());
81        assertEquals(0, data.getDataResources().size());
82        SolrInputDocument doc = data.getSolrDocument();
83        // TODO FIX bad test case. Depends on the presence of an internet connection! (BAD!)
84        assertTrue(doc.getFieldValues(FacetConstants.FIELD_CLARIN_PROFILE).contains("imdi-corpus"));
85        assertNotNull(doc);
86    }
87
88    @Test
89    public void testCreateCMDIDataFromSession() throws Exception {
90       
91        // make sure the mapping file for testing is used
92        VloConfig.setFacetConceptsFile("/facetConceptsTest.xml");
93
94        String content = "";
95        content += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
96        content += "<CMD xmlns=\"http://www.clarin.eu/cmd/\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n";
97        content += "     xsi:schemaLocation=\"http://www.clarin.eu/cmd http://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/profiles/clarin.eu:cr1:p_1271859438204/xsd\">\n";
98        content += "   <Header>\n";
99        content += "      <MdCreationDate>2008-05-27</MdCreationDate>\n";
100        content += "      <MdSelfLink>test-hdl:1839/00-0000-0000-0009-294C-9</MdSelfLink>\n";
101        content += "      <MdProfile>clarin.eu:cr1:p_1271859438204</MdProfile>\n";
102        content += "   </Header>\n";
103        content += "   <Resources>\n";
104        content += "      <ResourceProxyList>\n";
105        content += "         <ResourceProxy id=\"d314e408\">\n";
106        content += "            <ResourceType mimetype=\"video/x-mpeg1\" >Resource</ResourceType>\n";
107        content += "            <ResourceRef>../Media/elan-example1.mpg</ResourceRef>\n";
108        content += "         </ResourceProxy>\n";
109        content += "         <ResourceProxy id=\"d314e471\">\n";
110        content += "            <ResourceType mimetype=\"audio/mpeg\" >Resource</ResourceType>\n";
111        content += "            <ResourceRef>../Media/elan-example1.mp3</ResourceRef>\n";
112        content += "         </ResourceProxy>\n";
113        content += "      </ResourceProxyList>\n";
114        content += "      <JournalFileProxyList/>\n";
115        content += "      <ResourceRelationList/>\n";
116        content += "   </Resources>\n";
117        content += "   <Components>\n";
118        content += "      <Session>\n";
119        content += "         <Name>kleve-route</Name>\n";
120        content += "         <Title>route description to Kleve</Title>\n";
121        content += "         <Date>2002-10-30</Date>\n";
122        content += "         <descriptions>\n";
123        content += "            <Description LanguageId=\"ISO639-2:eng\">This  recording was made to generate a freely available test resource including speech and gestures. The annotations were created by Peter and Kita who is gesture researcher at the MPI for Psycholinguistics.</Description>\n";
124        content += "            <Description LanguageId=\"ISO639-2:ger\">Diese Aufnahme wurde erzeugt, um eine frei verf\\u00fcgbare Test Resource zur Verf\\u00fcgung stellen zu k\\u00f6nnen, die Sprache und Gestik umfasst. Die Annotationen wurden von Peter und Kita, dem Gestik Researcher am MPI erzeugt.</Description>\n";
125        content += "         </descriptions>\n";
126        content += "         <MDGroup>\n";
127        content += "            <Location>\n";
128        content += "               <Continent>Europe</Continent>\n";
129        content += "               <Country>Netherlands</Country>\n";
130        content += "               <Region/>\n";
131        content += "               <Address>Wundtlaan 1, Nijmegen</Address>\n";
132        content += "            </Location>\n";
133        content += "            <Project>\n";
134        content += "               <Name>Peter Wittenburg</Name>\n";
135        content += "               <Title>Route description test resource</Title>\n";
136        content += "               <Id/>\n";
137        content += "               <Contact>\n";
138        content += "                  <Name>Peter Wittenburg</Name>\n";
139        content += "                  <Address>Wundtlaan 1, 6525 XD Nijmegen</Address>\n";
140        content += "                  <Email>peter.wittenburg@mpi.nl</Email>\n";
141        content += "                  <Organisation>Max Planck Institute for Psycholinguistics</Organisation>\n";
142        content += "               </Contact>\n";
143        content += "               <descriptions>\n";
144        content += "                  <Description LanguageId=\"\"/>\n";
145        content += "               </descriptions>\n";
146        content += "            </Project>\n";
147        content += "            <Keys>\n";
148        content += "               <Key Name=\"conversion.IMDI.1.9to3.0.warning\">Unknown mapping of Genre: conversation|explanation|unspecified --&gt; ???</Key>\n";
149        content += "            </Keys>\n";
150        content += "            <Content>\n";
151        content += "               <Genre>Demo</Genre>\n";
152        content += "               <SubGenre>Unspecified</SubGenre>\n";
153        content += "               <Task>route description</Task>\n";
154        content += "               <Modalities>Speech; Gestures</Modalities>\n";
155        content += "               <CommunicationContext>\n";
156        content += "                  <Interactivity>interactive</Interactivity>\n";
157        content += "                  <PlanningType>semi-spontaneous</PlanningType>\n";
158        content += "                  <Involvement>elicited</Involvement>\n";
159        content += "                  <SocialContext>Unspecified</SocialContext>\n";
160        content += "                  <EventStructure>Unspecified</EventStructure>\n";
161        content += "                  <Channel>Unspecified</Channel>\n";
162        content += "               </CommunicationContext>\n";
163        content += "               <Content_Languages>\n";
164        content += "                  <descriptions>\n";
165        content += "                     <Description LanguageId=\"\"/>\n";
166        content += "                  </descriptions>\n";
167        content += "                  <Content_Language>\n";
168        content += "                     <Id>ISO639-3:eng</Id>\n";
169        content += "                     <Name>English</Name>\n";
170        content += "                     <descriptions>\n";
171        content += "                        <Description LanguageId=\"\"/>\n";
172        content += "                     </descriptions>\n";
173        content += "                  </Content_Language>\n";
174        content += "               </Content_Languages>\n";
175        content += "               <Keys>\n";
176        content += "                  <Key Name=\"IMDI__1_9.Interactional\">conversation</Key>\n";
177        content += "                  <Key Name=\"IMDI__1_9.Discursive\">explanation</Key>\n";
178        content += "                  <Key Name=\"IMDI__1_9.Interactional\">Unspecified</Key>\n";
179        content += "               </Keys>\n";
180        content += "               <descriptions>\n";
181        content += "                  <Description LanguageId=\"ISO639:eng\">This file was generated from an IMDI 1.9 file and transformed to IMDI 3.0. The substructure of Genre is replaced by two elements named \"Genre\" and \"SubGenre\". The original content of Genre substructure was: Interactional = 'conversation', Discursive = 'explanation', Performance = 'Unspecified'. These values have been added as Keys to the Content information.</Description>\n";
182        content += "                  <Description LanguageId=\"ISO639:eng\">Peter explains how to come from Nijmegen to Kleve by car, such that Kita would be able to get there.</Description>\n";
183        content += "               </descriptions>\n";
184        content += "            </Content>\n";
185        content += "            <Actors>\n";
186        content += "               <descriptions>\n";
187        content += "                  <Description LanguageId=\"\"/>\n";
188        content += "               </descriptions>\n";
189        content += "               <Actor>\n";
190        content += "                  <Role>interviewee</Role>\n";
191        content += "                  <Name>Peter</Name>\n";
192        content += "                  <FullName>Peter Wittenburg</FullName>\n";
193        content += "                  <Code>W</Code>\n";
194        content += "                  <FamilySocialRole>Unspecified</FamilySocialRole>\n";
195        content += "                  <EthnicGroup/>\n";
196        content += "                  <Age>Unknown</Age>\n";
197        content += "                  <BirthDate>Unspecified</BirthDate>\n";
198        content += "                  <Sex>Unknown</Sex>\n";
199        content += "                  <Education>university</Education>\n";
200        content += "                  <Anonymized>true</Anonymized>\n";
201        content += "                  <Contact>\n";
202        content += "                     <Name/>\n";
203        content += "                     <Address/>\n";
204        content += "                     <Email/>\n";
205        content += "                     <Organisation/>\n";
206        content += "                  </Contact>\n";
207        content += "                  <Keys/>\n";
208        content += "                  <descriptions>\n";
209        content += "                     <Description LanguageId=\"\"/>\n";
210        content += "                  </descriptions>\n";
211        content += "                  <Actor_Languages>\n";
212        content += "                     <descriptions>\n";
213        content += "                        <Description LanguageId=\"\"/>\n";
214        content += "                     </descriptions>\n";
215        content += "                     <Actor_Language>\n";
216        content += "                        <Id>ISO639-3:nld</Id>\n";
217        content += "                        <Name>Dutch</Name>\n";
218        content += "                        <descriptions>\n";
219        content += "                           <Description LanguageId=\"\"/>\n";
220        content += "                        </descriptions>\n";
221        content += "                     </Actor_Language>\n";
222        content += "                     <Actor_Language>\n";
223        content += "                        <Id>ISO639-3:deu</Id>\n";
224        content += "                        <Name>German</Name>\n";
225        content += "                        <descriptions>\n";
226        content += "                           <Description LanguageId=\"\"/>\n";
227        content += "                        </descriptions>\n";
228        content += "                     </Actor_Language>\n";
229        content += "                     <Actor_Language>\n";
230        content += "                        <Id>ISO639-3:eng</Id>\n";
231        content += "                        <Name>English</Name>\n";
232        content += "                        <descriptions>\n";
233        content += "                           <Description LanguageId=\"\"/>\n";
234        content += "                        </descriptions>\n";
235        content += "                     </Actor_Language>\n";
236        content += "                  </Actor_Languages>\n";
237        content += "               </Actor>\n";
238        content += "               <Actor>\n";
239        content += "                  <Role>interviewer</Role>\n";
240        content += "                  <Name>Kita</Name>\n";
241        content += "                  <FullName>Sotaro Kita</FullName>\n";
242        content += "                  <Code>k</Code>\n";
243        content += "                  <FamilySocialRole>Unspecified</FamilySocialRole>\n";
244        content += "                  <EthnicGroup/>\n";
245        content += "                  <Age>Unknown</Age>\n";
246        content += "                  <BirthDate>Unspecified</BirthDate>\n";
247        content += "                  <Sex>Unknown</Sex>\n";
248        content += "                  <Education>university</Education>\n";
249        content += "                  <Anonymized>true</Anonymized>\n";
250        content += "                  <Contact>\n";
251        content += "                     <Name/>\n";
252        content += "                     <Address/>\n";
253        content += "                     <Email/>\n";
254        content += "                     <Organisation/>\n";
255        content += "                  </Contact>\n";
256        content += "                  <Keys/>\n";
257        content += "                  <descriptions>\n";
258        content += "                     <Description LanguageId=\"\"/>\n";
259        content += "                  </descriptions>\n";
260        content += "                  <Actor_Languages>\n";
261        content += "                     <descriptions>\n";
262        content += "                        <Description LanguageId=\"\"/>\n";
263        content += "                     </descriptions>\n";
264        content += "                     <Actor_Language>\n";
265        content += "                        <Id>ISO639-3:eng</Id>\n";
266        content += "                        <Name>English</Name>\n";
267        content += "                        <descriptions>\n";
268        content += "                           <Description LanguageId=\"\"/>\n";
269        content += "                        </descriptions>\n";
270        content += "                     </Actor_Language>\n";
271        content += "                     <Actor_Language>\n";
272        content += "                        <Id>ISO639-3:jpn</Id>\n";
273        content += "                        <Name>Japanese</Name>\n";
274        content += "                        <descriptions>\n";
275        content += "                           <Description LanguageId=\"\"/>\n";
276        content += "                        </descriptions>\n";
277        content += "                     </Actor_Language>\n";
278        content += "                  </Actor_Languages>\n";
279        content += "               </Actor>\n";
280        content += "               <Actor>\n";
281        content += "                  <Role>Collector</Role>\n";
282        content += "                  <Name>Peter Wittenburg</Name>\n";
283        content += "                  <FullName>Peter Wittenburg</FullName>\n";
284        content += "                  <Code>Unspecified</Code>\n";
285        content += "                  <FamilySocialRole>Unspecified</FamilySocialRole>\n";
286        content += "                  <EthnicGroup/>\n";
287        content += "                  <Age>Unspecified</Age>\n";
288        content += "                  <BirthDate>Unspecified</BirthDate>\n";
289        content += "                  <Sex>Unspecified</Sex>\n";
290        content += "                  <Education/>\n";
291        content += "                  <Anonymized>false</Anonymized>\n";
292        content += "                  <Contact>\n";
293        content += "                     <Name>Peter Wittenburg</Name>\n";
294        content += "                     <Address>Wundtlaan 1, 6525 XD Nijmegen</Address>\n";
295        content += "                     <Email>peter.wittenburg@mpi.nl</Email>\n";
296        content += "                     <Organisation>Max-Planck-Institute for Psycholinguistics</Organisation>\n";
297        content += "                  </Contact>\n";
298        content += "                  <Keys/>\n";
299        content += "                  <descriptions>\n";
300        content += "                     <Description LanguageId=\"\"/>\n";
301        content += "                  </descriptions>\n";
302        content += "                  <Actor_Languages/>\n";
303        content += "               </Actor>\n";
304        content += "            </Actors>\n";
305        content += "         </MDGroup>\n";
306        content += "         <Resources>\n";
307        content += "            <MediaFile ref=\"d314e408\">\n";
308        content += "               <ResourceLink>../Media/elan-example1.mpg</ResourceLink>\n";
309        content += "               <Type>video</Type>\n";
310        content += "               <Format>video/x-mpeg1</Format>\n";
311        content += "               <Size/>\n";
312        content += "               <Quality>Unknown</Quality>\n";
313        content += "               <RecordingConditions>excellent</RecordingConditions>\n";
314        content += "               <TimePosition>\n";
315        content += "                  <Start>Unknown</Start>\n";
316        content += "                  <End>Unknown</End>\n";
317        content += "               </TimePosition>\n";
318        content += "               <Access>\n";
319        content += "                  <Availability>openly available</Availability>\n";
320        content += "                  <Date>2003-02-12</Date>\n";
321        content += "                  <Owner>MPI for Psycholinguistics</Owner>\n";
322        content += "                  <Publisher/>\n";
323        content += "                  <Contact>\n";
324        content += "                     <Name>Romuald Skiba</Name>\n";
325        content += "                     <Address/>\n";
326        content += "                     <Email/>\n";
327        content += "                     <Organisation/>\n";
328        content += "                  </Contact>\n";
329        content += "                  <descriptions>\n";
330        content += "                     <Description LanguageId=\"\"/>\n";
331        content += "                  </descriptions>\n";
332        content += "               </Access>\n";
333        content += "               <descriptions>\n";
334        content += "                  <Description LanguageId=\"\"/>\n";
335        content += "               </descriptions>\n";
336        content += "               <Keys/>\n";
337        content += "            </MediaFile>\n";
338        content += "            <MediaFile ref=\"d314e471\">\n";
339        content += "               <ResourceLink>../Media/elan-example1.mp4</ResourceLink>\n";
340        content += "               <Type>video</Type>\n";
341        content += "               <Format>video/mp4</Format>\n";
342        content += "               <Size/>\n";
343        content += "               <Quality>Unknown</Quality>\n";
344        content += "               <RecordingConditions>excellent</RecordingConditions>\n";
345        content += "               <TimePosition>\n";
346        content += "                  <Start>Unknown</Start>\n";
347        content += "                  <End>Unknown</End>\n";
348        content += "               </TimePosition>\n";
349        content += "               <Access>\n";
350        content += "                  <Availability>openly available</Availability>\n";
351        content += "                  <Date>2003-02-12</Date>\n";
352        content += "                  <Owner>MPI for Psycholinguistics</Owner>\n";
353        content += "                  <Publisher/>\n";
354        content += "                  <Contact>\n";
355        content += "                     <Name>Romuald Skiba</Name>\n";
356        content += "                     <Address/>\n";
357        content += "                     <Email/>\n";
358        content += "                     <Organisation/>\n";
359        content += "                  </Contact>\n";
360        content += "                  <descriptions>\n";
361        content += "                     <Description LanguageId=\"\"/>\n";
362        content += "                  </descriptions>\n";
363        content += "               </Access>\n";
364        content += "               <descriptions>\n";
365        content += "                  <Description LanguageId=\"\"/>\n";
366        content += "               </descriptions>\n";
367        content += "               <Keys/>\n";
368        content += "            </MediaFile>\n";
369        content += "         </Resources>\n";
370        content += "         <References>\n";
371        content += "            <descriptions>\n";
372        content += "               <Description LanguageId=\"\"/>\n";
373        content += "            </descriptions>\n";
374        content += "         </References>\n";
375        content += "      </Session>\n";
376        content += "   </Components>\n";
377        content += "</CMD>\n";
378        File cmdiFile = createCmdiFile("testSession", content);
379        CMDIDataProcessor processor = getDataParser();
380        CMDIData data = processor.process(cmdiFile);
381        assertEquals("test-hdl:1839/00-0000-0000-0009-294C-9", data.getId());
382        List<Resource> resources = data.getMetadataResources();
383        assertEquals(0, resources.size());
384        List<Resource> dataResources = data.getDataResources();
385        assertEquals(2, dataResources.size());
386        Resource res = dataResources.get(0);
387        assertEquals("../Media/elan-example1.mpg", res.getResourceName());
388        assertEquals("video/x-mpeg1", res.getMimeType());
389        res = dataResources.get(1);
390        assertEquals("../Media/elan-example1.mp3", res.getResourceName());
391        assertEquals("audio/mpeg", res.getMimeType());
392        SolrInputDocument doc = data.getSolrDocument();
393        assertNotNull(doc);
394        assertEquals(14, doc.getFieldNames().size());
395        assertEquals("kleve-route", doc.getFieldValue("name"));
396        assertEquals("Peter Wittenburg", doc.getFieldValue(FacetConstants.FIELD_PROJECT_NAME));
397        assertEquals("Europe", doc.getFieldValue("continent"));
398        assertEquals("English", doc.getFieldValue("language"));
399        assertEquals("Netherlands", doc.getFieldValue("country"));
400        assertEquals("Max Planck Institute for Psycholinguistics", doc.getFieldValue("organisation"));
401        assertEquals("demo", doc.getFieldValue("genre"));
402        assertEquals(
403                "This  recording was made to generate a freely available test resource including speech and gestures. The annotations were created by Peter and Kita who is gesture researcher at the MPI for Psycholinguistics.",
404                doc.getFieldValue("description"));
405        assertEquals("2002", doc.getFieldValue("year"));
406        List<String> fieldValues = new ArrayList(doc.getFieldValues(FacetConstants.FIELD_FORMAT));
407        assertEquals(2, fieldValues.size());
408        assertEquals("video/x-mpeg1", fieldValues.get(0));
409        assertEquals("video/mp4", fieldValues.get(1));
410        assertEquals(null, doc.getFieldValue("subject"));
411    }
412
413    @Test
414    public void testCreateCMDISessionSmall() throws Exception {
415       
416        // make sure the mapping file for testing is used
417        VloConfig.setFacetConceptsFile("/facetConceptsTest.xml");
418
419        String content = "";
420        content += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
421        content += "<CMD xmlns=\"http://www.clarin.eu/cmd/\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n";
422        content += "     xsi:schemaLocation=\"http://www.clarin.eu/cmd http://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/profiles/clarin.eu:cr1:p_1271859438204/xsd\">\n";
423        content += "   <Header>\n";
424        content += "      <MdCreationDate>2008-05-27</MdCreationDate>\n";
425        content += "      <MdSelfLink>test-hdl:1839/00-0000-0000-0009-294C-9</MdSelfLink>\n";
426        content += "      <MdProfile>clarin.eu:cr1:p_1271859438204</MdProfile>\n";
427        content += "   </Header>\n";
428        content += "   <Resources>\n";
429        content += "    </Resources>\n";
430        content += "   <Components>\n";
431        content += "      <Session>\n";
432        content += "         <Name>kleve-route</Name>\n";
433        content += "      </Session>\n";
434        content += "   </Components>\n";
435        content += "</CMD>\n";
436        File cmdiFile = createCmdiFile("testSession", content);
437        CMDIDataProcessor processor = getDataParser();
438        CMDIData data = processor.process(cmdiFile);
439        assertEquals("kleve-route", data.getSolrDocument().getFieldValue(FacetConstants.FIELD_NAME));
440    }
441
442    @Test
443    public void testEmptyFieldsShouldBeNull() throws Exception {
444       
445        // make sure the mapping file for testing is used
446        VloConfig.setFacetConceptsFile("/facetConceptsTest.xml");
447
448        String content = "";
449        content += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
450        content += "<CMD xmlns=\"http://www.clarin.eu/cmd/\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n";
451        content += "     xsi:schemaLocation=\"http://www.clarin.eu/cmd http://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/profiles/clarin.eu:cr1:p_1271859438204/xsd\">\n";
452        content += "   <Header>\n";
453        content += "      <MdCreationDate>2008-05-27</MdCreationDate>\n";
454        content += "      <MdSelfLink>test-hdl:1839/00-0000-0000-0009-294C-9</MdSelfLink>\n";
455        content += "      <MdProfile>clarin.eu:cr1:p_1271859438204</MdProfile>\n";
456        content += "   </Header>\n";
457        content += "   <Resources>\n";
458        content += "      <ResourceProxyList>\n";
459        content += "      </ResourceProxyList>\n";
460        content += "      <JournalFileProxyList/>\n";
461        content += "      <ResourceRelationList/>\n";
462        content += "   </Resources>\n";
463        content += "   <Components>\n";
464        content += "      <Session>\n";
465        content += "         <Name>kleve-route</Name>\n";
466        content += "         <Title>route description to Kleve</Title>\n";
467        content += "         <Date></Date>\n";
468        content += "         <descriptions>\n";
469        content += "            <Description LanguageId=\"ISO639-2:eng\">Test.</Description>\n";
470        content += "         </descriptions>\n";
471        content += "         <MDGroup>\n";
472        content += "            <Location>\n";
473        content += "               <Continent>Europe</Continent>\n";
474        content += "               <Country>Netherlands</Country>\n";
475        content += "               <Region/>\n";
476        content += "               <Address>Wundtlaan 1, Nijmegen</Address>\n";
477        content += "            </Location>\n";
478        content += "            <Project>\n";
479        content += "               <Name></Name>\n";
480        content += "               <Title></Title>\n";
481        content += "               <Id/>\n";
482        content += "               <Contact>\n";
483        content += "                  <Name></Name>\n";
484        content += "                  <Address></Address>\n";
485        content += "                  <Email></Email>\n";
486        content += "                  <Organisation></Organisation>\n";
487        content += "               </Contact>\n";
488        content += "               <descriptions>\n";
489        content += "                  <Description LanguageId=\"\"/>\n";
490        content += "               </descriptions>\n";
491        content += "            </Project>\n";
492        content += "            <Keys>\n";
493        content += "            </Keys>\n";
494        content += "            <Content>\n";
495        content += "               <Genre>Demo</Genre>\n";
496        content += "               <SubGenre>Unspecified</SubGenre>\n";
497        content += "               <Task>route description</Task>\n";
498        content += "               <Modalities>Speech; Gestures</Modalities>\n";
499        content += "               <CommunicationContext>\n";
500        content += "               </CommunicationContext>\n";
501        content += "               <Content_Languages>\n";
502        content += "               </Content_Languages>\n";
503        content += "               <descriptions>\n";
504        content += "               </descriptions>\n";
505        content += "            </Content>\n";
506        content += "            <Actors>\n";
507        content += "            </Actors>\n";
508        content += "         </MDGroup>\n";
509        content += "         <Resources>\n";
510        content += "         </Resources>\n";
511        content += "      </Session>\n";
512        content += "   </Components>\n";
513        content += "</CMD>\n";
514        File cmdiFile = createCmdiFile("testSession", content);
515        CMDIDataProcessor processor = getDataParser();
516        CMDIData data = processor.process(cmdiFile);
517        assertEquals("test-hdl:1839/00-0000-0000-0009-294C-9", data.getId());
518        List<Resource> resources = data.getMetadataResources();
519        assertEquals(0, resources.size());
520        SolrInputDocument doc = data.getSolrDocument();
521        assertNotNull(doc);
522        assertEquals(8, doc.getFieldNames().size());
523        assertEquals("kleve-route", doc.getFieldValue("name"));
524        assertEquals("Europe", doc.getFieldValue("continent"));
525        assertEquals("Netherlands", doc.getFieldValue("country"));
526        assertEquals("demo", doc.getFieldValue("genre"));
527        assertEquals("Test.", doc.getFieldValue("description"));
528        assertEquals("Should be null not empty string", null, doc.getFieldValue("organisation"));
529        assertEquals(null, doc.getFieldValue("language"));
530        assertEquals(null, doc.getFieldValue("subject"));
531        assertEquals(null, doc.getFieldValue("year"));
532    }
533
534    @Test
535    public void testOlac() throws Exception {
536
537        // make sure the mapping file for testing is used
538        VloConfig.setFacetConceptsFile("/facetConceptsTest.xml");
539
540        String content = "";
541        content += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
542        content += "<CMD xmlns=\"http://www.clarin.eu/cmd/\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n";
543        content += "     xmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n";
544        content += "     xmlns:oai_dc=\"http://www.openarchives.org/OAI/2.0/oai_dc/\"\n";
545        content += "     xmlns:defns=\"http://www.openarchives.org/OAI/2.0/\"\n";
546        content += "     xsi:schemaLocation=\"http://www.clarin.eu/cmd http://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/profiles/clarin.eu:cr1:p_1288172614026/xsd\">\n";
547        content += "   <Header>\n";
548        content += "      <MdCreator>olac2cmdi.xsl</MdCreator>\n";
549        content += "      <MdCreationDate>2002-12-14</MdCreationDate>\n";
550        content += "      <MdSelfLink>oai:ailla.utexas.edu:1</MdSelfLink>\n";
551        content += "      <MdProfile>clarin.eu:cr1:p_1288172614026</MdProfile>\n";
552        content += "   </Header>\n";
553        content += "   <Resources>\n";
554        content += "      <ResourceProxyList/>\n";
555        content += "      <JournalFileProxyList/>\n";
556        content += "      <ResourceRelationList/>\n";
557        content += "   </Resources>\n";
558        content += "   <Components>\n";
559        content += "      <OLAC-DcmiTerms>\n";
560        content += "         <creator>Joel Sherzer (recorder)</creator>\n";
561        content += "         <description>\n";
562        content += "    Channel: Talking;\n";
563        content += "    Genre: Traditional Narrative / Story;\n";
564        content += "    Country: Panama;\n";
565        content += "    Place of Recording: Mulatuppu;\n";
566        content += "    Event: Community Gathering;\n";
567        content += "    Institutional Affiliation: University of Texas at Austin;\n";
568        content += "    Participant Information: Political Leader;\n";
569        content += "      </description>\n";
570        content += "         <description>The one-eyed grandmother is one of many traditional Kuna stories performed in the Kuna gathering house. This story, performed here by Pedro Arias, combines European derived motifs (Tom Thumb and Hansel and Gretel) with themes that seem more Kuna in origin. All are woven together and a moral is provided. Pedro Arias performed this story before a gathered audience in the morning..\n";
571        content += "      </description>\n";
572        content += "         <description>Test</description>\n";
573        content += "         <identifier>http://uts.cc.utexas.edu/~ailla/audio/sherzer/one_eyed_grandmother.ram</identifier>\n";
574        content += "         <identifier>http://uts.cc.utexas.edu/~ailla/texts/sherzer/one_eyed_grandmother.pdf</identifier>\n";
575        content += "         <language olac-language=\"x-sil-CHN\"/>\n";
576        content += "         <language>Chinese</language>\n";
577        content += "         <subject olac-linguistic-field=\"testSubject\">Kuna</subject>\n";
578        content += "         <type olac-linguistic-type=\"Transcription\"/>\n";
579        content += "         <format>WAV</format>\n";
580        content += "        <type dcterms-type=\"DCMIType\">Sound</type>\n";
581        content += "      </OLAC-DcmiTerms>\n";
582        content += "   </Components>\n";
583        content += "</CMD>\n";
584
585        File cmdiFile = createCmdiFile("testOlac", content);
586        CMDIDataProcessor processor = getDataParser();
587        CMDIData data = processor.process(cmdiFile);
588        assertEquals("oai:ailla.utexas.edu:1", data.getId());
589        List<Resource> resources = data.getMetadataResources();
590        assertEquals(0, resources.size());
591        List<Resource> dataResources = data.getDataResources();
592        assertEquals(0, dataResources.size());
593        SolrInputDocument doc = data.getSolrDocument();
594        assertNotNull(doc);
595        assertEquals(8, doc.getFieldNames().size());
596        assertEquals(null, doc.getFieldValue("name"));
597        assertEquals(null, doc.getFieldValue("continent"));
598        assertEquals(1, doc.getFieldValues("language").size());
599        assertEquals("x-sil-CHN", doc.getFieldValue("language"));
600        assertEquals(null, doc.getFieldValue("country"));
601        assertEquals(null, doc.getFieldValue("organisation"));
602        assertEquals("transcription", doc.getFieldValue("genre"));
603        assertEquals("kuna", doc.getFieldValue("subject"));
604        Collection<Object> fieldValues = doc.getFieldValues("description");
605        assertEquals(3, fieldValues.size());
606        List<String> descriptions = new ArrayList(fieldValues);
607        Collections.sort(descriptions);
608        assertEquals("Channel: Talking;\n    Genre: Traditional Narrative / Story;\n    Country: Panama;\n"
609                + "    Place of Recording: Mulatuppu;\n    Event: Community Gathering;\n"
610                + "    Institutional Affiliation: University of Texas at Austin;\n    Participant Information: Political Leader;", descriptions.get(0).toString());
611        assertEquals("Test", descriptions.get(1).toString());
612        assertEquals("The one-eyed grandmother is one of many traditional Kuna stories performed "
613                + "in the Kuna gathering house. This story, performed here by Pedro Arias, combines "
614                + "European derived motifs (Tom Thumb and Hansel and Gretel) with themes that seem more "
615                + "Kuna in origin. All are woven together and a moral is provided. Pedro Arias performed "
616                + "this story before a gathered audience in the morning..", descriptions.get(2).toString());
617        assertEquals("Sound", doc.getFieldValue(FacetConstants.FIELD_RESOURCE_CLASS));
618    }
619
620    @Test
621    public void testOlacMultiFacets() throws Exception {
622       
623        // make sure the mapping file for testing is used
624        VloConfig.setFacetConceptsFile("/facetConceptsTest.xml");
625
626        String content = "";
627        content += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
628        content += "<CMD xmlns=\"http://www.clarin.eu/cmd/\">\n";
629        content += "   <Header>\n";
630        content += "      <MdProfile>clarin.eu:cr1:p_1288172614026</MdProfile>\n";
631        content += "   </Header>\n";
632        content += "   <Components>\n";
633        content += "      <OLAC-DcmiTerms>\n";
634        content += "         <subject olac-linguistic-field=\"testSubject\">Kuna</subject>\n";
635        content += "         <subject dcterms-type=\"LCSH\">testSubjectFallback</subject>\n";
636        content += "         <spatial dcterms-type=\"ISO3166\">testCountry1</spatial>\n";
637        content += "         <coverage dcterms-type=\"ISO3166\">testCountry2</coverage>\n";
638        content += "         <language olac-language=\"language1\">test1</language>\n";
639        content += "         <subject olac-language=\"language2\">test2</subject>\n";
640        content += "         <subject olac-language=\"language2\">test2</subject>\n";
641        content += "      </OLAC-DcmiTerms>\n";
642        content += "   </Components>\n";
643        content += "</CMD>\n";
644
645        File cmdiFile = createCmdiFile("testOlac", content);
646        CMDIDataProcessor processor = getDataParser();
647        CMDIData data = processor.process(cmdiFile);
648        SolrInputDocument doc = data.getSolrDocument();
649        assertEquals(3, doc.getFieldValues(FacetConstants.FIELD_SUBJECT).size());
650        assertTrue(doc.getFieldValues(FacetConstants.FIELD_SUBJECT).contains("kuna"));
651        assertEquals(2, doc.getFieldValues(FacetConstants.FIELD_COUNTRY).size());
652        assertTrue(doc.getFieldValues(FacetConstants.FIELD_COUNTRY).contains("testCountry1"));
653        assertTrue(doc.getFieldValues(FacetConstants.FIELD_COUNTRY).contains("testCountry2"));
654        assertEquals(2, doc.getFieldValues(FacetConstants.FIELD_LANGUAGE).size());
655        assertTrue(doc.getFieldValues(FacetConstants.FIELD_LANGUAGE).contains("language1"));
656        assertTrue(doc.getFieldValues(FacetConstants.FIELD_LANGUAGE).contains("language2"));
657
658        content = "";
659        content += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
660        content += "<CMD xmlns=\"http://www.clarin.eu/cmd/\">\n";
661        content += "   <Header>\n";
662        content += "      <MdProfile>clarin.eu:cr1:p_1288172614026</MdProfile>\n";
663        content += "   </Header>\n";
664        content += "   <Components>\n";
665        content += "      <OLAC-DcmiTerms>\n";
666        content += "         <subject dcterms-type=\"LCSH\">testSubjectFallback</subject>\n";
667        content += "         <coverage dcterms-type=\"ISO3166\">testCountry2</coverage>\n";
668        content += "         <subject olac-language=\"language2\">test2</subject>\n";
669        content += "      </OLAC-DcmiTerms>\n";
670        content += "   </Components>\n";
671        content += "</CMD>\n";
672
673        cmdiFile = createCmdiFile("testOlac", content);
674        processor = getDataParser();
675        data = processor.process(cmdiFile);
676        doc = data.getSolrDocument();
677        assertEquals(2, doc.getFieldValues(FacetConstants.FIELD_SUBJECT).size());
678        assertEquals("testsubjectfallback", doc.getFieldValue(FacetConstants.FIELD_SUBJECT));
679        assertEquals(1, doc.getFieldValues(FacetConstants.FIELD_COUNTRY).size());
680        assertEquals("testCountry2", doc.getFieldValue(FacetConstants.FIELD_COUNTRY));
681        assertEquals(1, doc.getFieldValues(FacetConstants.FIELD_LANGUAGE).size());
682        assertEquals("language2", doc.getFieldValue(FacetConstants.FIELD_LANGUAGE));
683
684        content = "";
685        content += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
686        content += "<CMD xmlns=\"http://www.clarin.eu/cmd/\">\n";
687        content += "   <Header>\n";
688        content += "      <MdProfile>clarin.eu:cr1:p_1288172614026</MdProfile>\n";
689        content += "   </Header>\n";
690        content += "   <Components>\n";
691        content += "      <OLAC-DcmiTerms>\n";
692        content += "         <subject dcterms-type=\"LCSH\">testSubjectFallback</subject>\n";
693        content += "         <subject olac-linguistic-field=\"testSubject\">Kuna</subject>\n";
694        content += "         <coverage dcterms-type=\"ISO3166\">testCountry2</coverage>\n";
695        content += "         <spatial dcterms-type=\"ISO3166\">testCountry1</spatial>\n";
696        content += "         <subject olac-language=\"language1\">test2</subject>\n";
697        content += "         <language olac-language=\"language1\">test1</language>\n";
698        content += "      </OLAC-DcmiTerms>\n";
699        content += "   </Components>\n";
700        content += "</CMD>\n";
701
702        cmdiFile = createCmdiFile("testOlac", content);
703        processor = getDataParser();
704        data = processor.process(cmdiFile);
705        doc = data.getSolrDocument();
706        assertEquals(3, doc.getFieldValues("subject").size());
707        assertEquals("testsubjectfallback", doc.getFieldValue("subject"));
708        assertEquals(2, doc.getFieldValues(FacetConstants.FIELD_COUNTRY).size());
709        assertTrue(doc.getFieldValues(FacetConstants.FIELD_COUNTRY).contains("testCountry1"));
710        assertTrue(doc.getFieldValues(FacetConstants.FIELD_COUNTRY).contains("testCountry2"));
711        assertEquals(1, doc.getFieldValues(FacetConstants.FIELD_LANGUAGE).size());
712        assertTrue(doc.getFieldValues(FacetConstants.FIELD_LANGUAGE).contains("language1"));
713    }
714
715    @Test
716    public void testIgnoreWhiteSpaceFacets() throws Exception {
717       
718        // make sure the mapping file for testing is used
719        VloConfig.setFacetConceptsFile("/facetConceptsTest.xml");
720
721        String content = "";
722        content += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
723        content += "<CMD xmlns=\"http://www.clarin.eu/cmd/\">\n";
724        content += "   <Header>\n";
725        content += "      <MdProfile>clarin.eu:cr1:p_1288172614026</MdProfile>\n";
726        content += "   </Header>\n";
727        content += "   <Components>\n";
728        content += "      <OLAC-DcmiTerms>\n";
729        content += "         <subject olac-linguistic-field=\"\n\n\t\t\t\">Kuna</subject>\n";
730        content += "      </OLAC-DcmiTerms>\n";
731        content += "   </Components>\n";
732        content += "</CMD>\n";
733
734        File cmdiFile = createCmdiFile("testOlac", content);
735        CMDIDataProcessor processor = getDataParser();
736        CMDIData data = processor.process(cmdiFile);
737        SolrInputDocument doc = data.getSolrDocument();
738        assertTrue(doc.getFieldValues("subject").contains("kuna"));
739    }
740
741    @Test
742    public void testCountryCodesPostProcessing() throws Exception {
743       
744        // make sure the mapping file for testing is used
745        VloConfig.setFacetConceptsFile("/facetConceptsTest.xml");
746
747        String content = "";
748        content += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
749        content += "<CMD xmlns=\"http://www.clarin.eu/cmd/\">\n";
750        content += "   <Header>\n";
751        content += "      <MdProfile>clarin.eu:cr1:p_1288172614026</MdProfile>\n";
752        content += "   </Header>\n";
753        content += "   <Components>\n";
754        content += "      <OLAC-DcmiTerms>\n";
755        content += "         <coverage dcterms-type=\"ISO3166\">NL</coverage>\n";
756        content += "      </OLAC-DcmiTerms>\n";
757        content += "   </Components>\n";
758        content += "</CMD>\n";
759
760        File cmdiFile = createCmdiFile("testOlac", content);
761        CMDIDataProcessor processor = getDataParser();
762        CMDIData data = processor.process(cmdiFile);
763        SolrInputDocument doc = data.getSolrDocument();
764        assertEquals("Netherlands", doc.getFieldValue(FacetConstants.FIELD_COUNTRY));
765    }
766
767    @Test
768    public void testLanguageCodesPostProcessing() throws Exception {
769       
770        // make sure the mapping file for testing is used
771        VloConfig.setFacetConceptsFile("/facetConceptsTest.xml");
772
773        String content = "";
774        content += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
775        content += "<CMD xmlns=\"http://www.clarin.eu/cmd/\">\n";
776        content += "   <Header>\n";
777        content += "      <MdProfile>clarin.eu:cr1:p_1288172614026</MdProfile>\n";
778        content += "   </Header>\n";
779        content += "   <Components>\n";
780        content += "      <OLAC-DcmiTerms>\n";
781        content += "         <language olac-language=\"fr\"/>\n";
782        content += "         <language olac-language=\"spa\"/>\n";
783        content += "      </OLAC-DcmiTerms>\n";
784        content += "   </Components>\n";
785        content += "</CMD>\n";
786
787        File cmdiFile = createCmdiFile("testOlac", content);
788        CMDIDataProcessor processor = getDataParser();
789        CMDIData data = processor.process(cmdiFile);
790        SolrInputDocument doc = data.getSolrDocument();
791        Collection<Object> values = doc.getFieldValues(FacetConstants.FIELD_LANGUAGE);
792        assertEquals(2, values.size());
793        Iterator<Object> iter = values.iterator();
794        assertEquals("French", iter.next());
795        assertEquals("Spanish; Castilian", iter.next());
796    }
797
798    @Test
799    public void testOlacCollection() throws Exception {
800       
801        // make sure the mapping file for testing is used
802        VloConfig.setFacetConceptsFile("/facetConceptsTest.xml");
803
804        String content = "";
805        content += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
806        content += "<CMD xmlns=\"http://www.clarin.eu/cmd/\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n";
807        content += "    xsi:schemaLocation=\"http://www.clarin.eu/cmd http://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/profiles/clarin.eu:cr1:p_1288172614026/xsd\">\n";
808        content += "    <Header>\n";
809        content += "        <MdCreator>dir2cmdicollection.py</MdCreator>\n";
810        content += "        <MdCreationDate>2010-10-11</MdCreationDate>\n";
811        content += "        <MdSelfLink>collection_ATILF_Resources.cmdi</MdSelfLink>\n";
812        content += "        <MdProfile>clarin.eu:cr1:p_1288172614026</MdProfile>\n";
813        content += "    </Header>\n";
814        content += "    <Resources>\n";
815        content += "        <ResourceProxyList>\n";
816        content += "<ResourceProxy id=\"ATILF_Resources_0_oai_atilf_inalf_fr_0001.xml.cmdi\"><ResourceType>Metadata</ResourceType><ResourceRef>ATILF_Resources/0/oai_atilf_inalf_fr_0001.xml.cmdi</ResourceRef></ResourceProxy>\n";
817        content += "<ResourceProxy id=\"ATILF_Resources_0_oai_atilf_inalf_fr_0002.xml.cmdi\"><ResourceType>Metadata</ResourceType><ResourceRef>ATILF_Resources/0/oai_atilf_inalf_fr_0002.xml.cmdi</ResourceRef></ResourceProxy>\n";
818        content += "<ResourceProxy id=\"ATILF_Resources_0_oai_atilf_inalf_fr_0003.xml.cmdi\"><ResourceType>Metadata</ResourceType><ResourceRef>ATILF_Resources/0/oai_atilf_inalf_fr_0003.xml.cmdi</ResourceRef></ResourceProxy>\n";
819        content += "<ResourceProxy id=\"ATILF_Resources_0_oai_atilf_inalf_fr_0004.xml.cmdi\"><ResourceType>Metadata</ResourceType><ResourceRef>ATILF_Resources/0/oai_atilf_inalf_fr_0004.xml.cmdi</ResourceRef></ResourceProxy>\n";
820        content += "<ResourceProxy id=\"ATILF_Resources_0_oai_atilf_inalf_fr_0005_a.xml.cmdi\"><ResourceType>Metadata</ResourceType><ResourceRef>ATILF_Resources/0/oai_atilf_inalf_fr_0005_a.xml.cmdi</ResourceRef></ResourceProxy>\n";
821        content += "<ResourceProxy id=\"ATILF_Resources_0_oai_atilf_inalf_fr_0005_b.xml.cmdi\"><ResourceType>Metadata</ResourceType><ResourceRef>ATILF_Resources/0/oai_atilf_inalf_fr_0005_b.xml.cmdi</ResourceRef></ResourceProxy>\n";
822        content += "<ResourceProxy id=\"ATILF_Resources_0_oai_atilf_inalf_fr_0006.xml.cmdi\"><ResourceType>Metadata</ResourceType><ResourceRef>ATILF_Resources/0/oai_atilf_inalf_fr_0006.xml.cmdi</ResourceRef></ResourceProxy>\n";
823        content += "<ResourceProxy id=\"ATILF_Resources_0_oai_atilf_inalf_fr_M277.xml.cmdi\"><ResourceType>Metadata</ResourceType><ResourceRef>ATILF_Resources/0/oai_atilf_inalf_fr_M277.xml.cmdi</ResourceRef></ResourceProxy>\n";
824        content += "<ResourceProxy id=\"ATILF_Resources_0_oai_atilf_inalf_fr_M592.xml.cmdi\"><ResourceType>Metadata</ResourceType><ResourceRef>ATILF_Resources/0/oai_atilf_inalf_fr_M592.xml.cmdi</ResourceRef></ResourceProxy>\n";
825        content += "        </ResourceProxyList>\n";
826        content += "        <JournalFileProxyList/>\n";
827        content += "        <ResourceRelationList/>\n";
828        content += "    </Resources>\n";
829        content += "    <Components>\n";
830        content += "        <olac></olac>\n";
831        content += "    </Components>\n";
832        content += "</CMD>\n";
833
834        File cmdiFile = createCmdiFile("testOlac", content);
835        CMDIDataProcessor processor = getDataParser();
836        CMDIData data = processor.process(cmdiFile);
837        assertEquals("collection_ATILF_Resources.cmdi", data.getId());
838        List<Resource> resources = data.getMetadataResources();
839        assertEquals(9, resources.size());
840        Resource res = resources.get(0);
841        assertEquals("ATILF_Resources/0/oai_atilf_inalf_fr_0001.xml.cmdi", res.getResourceName());
842        assertEquals(null, res.getMimeType());
843        assertEquals(0, data.getDataResources().size());
844        SolrInputDocument doc = data.getSolrDocument();
845        assertNotNull(doc);
846        List<Resource> dataResources = data.getDataResources();
847        assertEquals(0, dataResources.size());
848    }
849
850    @Test
851    public void testLrtCollection() throws Exception {
852       
853        // make sure the mapping file for testing is used
854        VloConfig.setFacetConceptsFile("/facetConceptsTest.xml");
855
856        String content = "";
857        content += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
858        content += "<CMD xmlns=\"http://www.clarin.eu/cmd/\" ns0:schemaLocation=\"http://www.clarin.eu/cmd http://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/profiles/clarin.eu:cr1:p_1289827960126/xsd\" xmlns:ns0=\"http://www.w3.org/2001/XMLSchema-instance\">\n";
859        content += "    <Header>\n";
860        content += "        <MdCreator>lrt2cmdi.py</MdCreator>\n";
861        content += "        <MdCreationDate>2010-11-17</MdCreationDate>\n";
862        content += "        <MdSelfLink>clarin.eu:lrt:433</MdSelfLink>\n";
863        content += "        <MdProfile>clarin.eu:cr1:p_1289827960126</MdProfile>\n";
864        content += "    </Header>\n";
865        content += "    <Resources>\n";
866        content += "        <ResourceProxyList />\n";
867        content += "        <JournalFileProxyList />\n";
868        content += "        <ResourceRelationList />\n";
869        content += "    </Resources>\n";
870        content += "    <Components>\n";
871        content += "        <LrtInventoryResource>\n";
872        content += "            <LrtCommon>\n";
873        content += "                <ResourceName>Corpus of Present-day Written Estonian</ResourceName>\n";
874        content += "                <ResourceType>Written Corpus</ResourceType>\n";
875        content += "                <LanguagesOther />\n";
876        content += "                <Description>written general; 95 mio words; TEI/SGML</Description>\n";
877        content += "                <ContactPerson>Kadri.Muischnek@ut.ee</ContactPerson>\n";
878        content += "                <Format />\n";
879        content += "                <Institute>Test</Institute>\n";
880        content += "                <MetadataLink />\n";
881        content += "                <Publications />\n";
882        content += "                <ReadilyAvailable>true</ReadilyAvailable>\n";
883        content += "                <ReferenceLink />         \n";
884        content += "                <Languages><ISO639><iso-639-3-code>est</iso-639-3-code></ISO639></Languages>\n";
885        content += "                <Countries><Country><Code>EE</Code></Country></Countries>\n";
886        content += "            </LrtCommon>\n";
887        content += "       </LrtInventoryResource>\n";
888        content += "    </Components>\n";
889        content += "</CMD>\n";
890
891        File cmdiFile = createCmdiFile("testOlac", content);
892        CMDIDataProcessor processor = getDataParser();
893        CMDIData data = processor.process(cmdiFile);
894        assertEquals("clarin.eu:lrt:433", data.getId());
895        List<Resource> resources = data.getMetadataResources();
896        assertEquals(0, resources.size());
897        List<Resource> dataResources = data.getDataResources();
898        assertEquals(0, dataResources.size());
899        SolrInputDocument doc = data.getSolrDocument();
900        assertNotNull(doc);
901        assertEquals(9, doc.getFieldNames().size());
902        assertEquals("Corpus of Present-day Written Estonian", doc.getFieldValue("name"));
903        assertEquals(null, doc.getFieldValue("continent"));
904        assertEquals(1, doc.getFieldValues("language").size());
905        assertEquals("Estonian", doc.getFieldValue("language"));
906        assertEquals("Estonia", doc.getFieldValue("country"));
907        assertEquals("Test", doc.getFieldValue("organisation"));
908        assertEquals(null, doc.getFieldValue("year"));
909        assertEquals(null, doc.getFieldValue("genre"));
910        assertEquals("written general; 95 mio words; TEI/SGML", doc.getFieldValue("description"));
911        assertEquals("Written Corpus", doc.getFieldValue(FacetConstants.FIELD_RESOURCE_CLASS));
912    }
913}
Note: See TracBrowser for help on using the repository browser.