1 | package eu.clarin.cmdi.vlo.importer; |
---|
2 | |
---|
3 | import eu.clarin.cmdi.vlo.FacetConstants; |
---|
4 | import eu.clarin.cmdi.vlo.config.VloConfig; |
---|
5 | import java.io.File; |
---|
6 | import java.util.ArrayList; |
---|
7 | import java.util.Collection; |
---|
8 | import java.util.Collections; |
---|
9 | import java.util.Iterator; |
---|
10 | import java.util.List; |
---|
11 | import org.apache.solr.common.SolrInputDocument; |
---|
12 | import static org.junit.Assert.assertEquals; |
---|
13 | import static org.junit.Assert.assertNotNull; |
---|
14 | import static org.junit.Assert.assertTrue; |
---|
15 | import org.junit.Before; |
---|
16 | import org.junit.Test; |
---|
17 | |
---|
18 | public class CMDIDataProcessorTest extends ImporterTestcase { |
---|
19 | |
---|
20 | private CMDIDataProcessor getDataParser() { |
---|
21 | return new CMDIParserVTDXML(MetadataImporter.POST_PROCESSORS); |
---|
22 | } |
---|
23 | |
---|
24 | @Test |
---|
25 | public void testCreateCMDIDataFromCorpus() throws Exception { |
---|
26 | |
---|
27 | // make sure the mapping file for testing is used |
---|
28 | VloConfig.setFacetConceptsFile("/facetConceptsTest.xml"); |
---|
29 | |
---|
30 | String content = ""; |
---|
31 | content += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; |
---|
32 | content += "<CMD xmlns=\"http://www.clarin.eu/cmd/\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\">\n"; |
---|
33 | content += " <Header>\n"; |
---|
34 | content += " <MdCreationDate>2003-01-14</MdCreationDate>\n"; |
---|
35 | content += " <MdSelfLink>test-hdl:1839/00-0000-0000-0000-0001-D</MdSelfLink>\n"; |
---|
36 | content += " <MdProfile>clarin.eu:cr1:p_1274880881885</MdProfile>\n"; |
---|
37 | content += " </Header>\n"; |
---|
38 | content += " <Resources>\n"; |
---|
39 | content += " <ResourceProxyList>\n"; |
---|
40 | content += " <ResourceProxy id=\"d28635e19\">\n"; |
---|
41 | content += " <ResourceType>Metadata</ResourceType>\n"; |
---|
42 | content += " <ResourceRef>../acqui_data/Corpusstructure/acqui.imdi.cmdi</ResourceRef>\n"; |
---|
43 | content += " </ResourceProxy>\n"; |
---|
44 | content += " <ResourceProxy id=\"d28635e23\">\n"; |
---|
45 | content += " <ResourceType>Metadata</ResourceType>\n"; |
---|
46 | content += " <ResourceRef>../Comprehension/Corpusstructure/comprehension.imdi.cmdi</ResourceRef>\n"; |
---|
47 | content += " </ResourceProxy>\n"; |
---|
48 | content += " <ResourceProxy id=\"d28635e26\">\n"; |
---|
49 | content += " <ResourceType>Metadata</ResourceType>\n"; |
---|
50 | content += " <ResourceRef>../lac_data/Corpusstructure/lac.imdi.cmdi</ResourceRef>\n"; |
---|
51 | content += " </ResourceProxy>\n"; |
---|
52 | content += " </ResourceProxyList>\n"; |
---|
53 | content += " <JournalFileProxyList/>\n"; |
---|
54 | content += " <ResourceRelationList/>\n"; |
---|
55 | content += " </Resources>\n"; |
---|
56 | content += " <Components>\n"; |
---|
57 | content += " <imdi-corpus>\n"; |
---|
58 | content += " <Corpus>\n"; |
---|
59 | content += " <Name>MPI corpora</Name>\n"; |
---|
60 | content += " <Title>Corpora of the Max-Planck Institute for Psycholinguistics</Title>\n"; |
---|
61 | content += " <CorpusLink Name=\"Acquisition\">../acqui_data/Corpusstructure/acqui.imdi</CorpusLink>\n"; |
---|
62 | content += " <CorpusLink Name=\"Comprehension\">../Comprehension/Corpusstructure/comprehension.imdi</CorpusLink>\n"; |
---|
63 | content += " <CorpusLink Name=\"Language and Cognition\">../lac_data/Corpusstructure/lac.imdi</CorpusLink>\n"; |
---|
64 | content += " <descriptions>\n"; |
---|
65 | content += " <Description LanguageId=\"\">IMDI corpora</Description>\n"; |
---|
66 | content += " <Description LanguageId=\"\"/>\n"; |
---|
67 | content += " </descriptions>\n"; |
---|
68 | content += " </Corpus>\n"; |
---|
69 | content += " </imdi-corpus>\n"; |
---|
70 | content += " </Components>\n"; |
---|
71 | content += "</CMD>\n"; |
---|
72 | File cmdiFile = createCmdiFile("testCorpus", content); |
---|
73 | CMDIDataProcessor processor = getDataParser(); |
---|
74 | CMDIData data = processor.process(cmdiFile); |
---|
75 | assertEquals("test-hdl:1839/00-0000-0000-0000-0001-D", data.getId()); |
---|
76 | List<Resource> resources = data.getMetadataResources(); |
---|
77 | assertEquals(3, resources.size()); |
---|
78 | Resource res = resources.get(0); |
---|
79 | assertEquals("../acqui_data/Corpusstructure/acqui.imdi.cmdi", res.getResourceName()); |
---|
80 | assertEquals(null, res.getMimeType()); |
---|
81 | assertEquals(0, data.getDataResources().size()); |
---|
82 | SolrInputDocument doc = data.getSolrDocument(); |
---|
83 | // TODO FIX bad test case. Depends on the presence of an internet connection! (BAD!) |
---|
84 | assertTrue(doc.getFieldValues(FacetConstants.FIELD_CLARIN_PROFILE).contains("imdi-corpus")); |
---|
85 | assertNotNull(doc); |
---|
86 | } |
---|
87 | |
---|
88 | @Test |
---|
89 | public void testCreateCMDIDataFromSession() throws Exception { |
---|
90 | |
---|
91 | // make sure the mapping file for testing is used |
---|
92 | VloConfig.setFacetConceptsFile("/facetConceptsTest.xml"); |
---|
93 | |
---|
94 | String content = ""; |
---|
95 | content += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; |
---|
96 | content += "<CMD xmlns=\"http://www.clarin.eu/cmd/\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n"; |
---|
97 | content += " xsi:schemaLocation=\"http://www.clarin.eu/cmd http://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/profiles/clarin.eu:cr1:p_1271859438204/xsd\">\n"; |
---|
98 | content += " <Header>\n"; |
---|
99 | content += " <MdCreationDate>2008-05-27</MdCreationDate>\n"; |
---|
100 | content += " <MdSelfLink>test-hdl:1839/00-0000-0000-0009-294C-9</MdSelfLink>\n"; |
---|
101 | content += " <MdProfile>clarin.eu:cr1:p_1271859438204</MdProfile>\n"; |
---|
102 | content += " </Header>\n"; |
---|
103 | content += " <Resources>\n"; |
---|
104 | content += " <ResourceProxyList>\n"; |
---|
105 | content += " <ResourceProxy id=\"d314e408\">\n"; |
---|
106 | content += " <ResourceType mimetype=\"video/x-mpeg1\" >Resource</ResourceType>\n"; |
---|
107 | content += " <ResourceRef>../Media/elan-example1.mpg</ResourceRef>\n"; |
---|
108 | content += " </ResourceProxy>\n"; |
---|
109 | content += " <ResourceProxy id=\"d314e471\">\n"; |
---|
110 | content += " <ResourceType mimetype=\"audio/mpeg\" >Resource</ResourceType>\n"; |
---|
111 | content += " <ResourceRef>../Media/elan-example1.mp3</ResourceRef>\n"; |
---|
112 | content += " </ResourceProxy>\n"; |
---|
113 | content += " </ResourceProxyList>\n"; |
---|
114 | content += " <JournalFileProxyList/>\n"; |
---|
115 | content += " <ResourceRelationList/>\n"; |
---|
116 | content += " </Resources>\n"; |
---|
117 | content += " <Components>\n"; |
---|
118 | content += " <Session>\n"; |
---|
119 | content += " <Name>kleve-route</Name>\n"; |
---|
120 | content += " <Title>route description to Kleve</Title>\n"; |
---|
121 | content += " <Date>2002-10-30</Date>\n"; |
---|
122 | content += " <descriptions>\n"; |
---|
123 | content += " <Description LanguageId=\"ISO639-2:eng\">This recording was made to generate a freely available test resource including speech and gestures. The annotations were created by Peter and Kita who is gesture researcher at the MPI for Psycholinguistics.</Description>\n"; |
---|
124 | content += " <Description LanguageId=\"ISO639-2:ger\">Diese Aufnahme wurde erzeugt, um eine frei verf\\u00fcgbare Test Resource zur Verf\\u00fcgung stellen zu k\\u00f6nnen, die Sprache und Gestik umfasst. Die Annotationen wurden von Peter und Kita, dem Gestik Researcher am MPI erzeugt.</Description>\n"; |
---|
125 | content += " </descriptions>\n"; |
---|
126 | content += " <MDGroup>\n"; |
---|
127 | content += " <Location>\n"; |
---|
128 | content += " <Continent>Europe</Continent>\n"; |
---|
129 | content += " <Country>Netherlands</Country>\n"; |
---|
130 | content += " <Region/>\n"; |
---|
131 | content += " <Address>Wundtlaan 1, Nijmegen</Address>\n"; |
---|
132 | content += " </Location>\n"; |
---|
133 | content += " <Project>\n"; |
---|
134 | content += " <Name>Peter Wittenburg</Name>\n"; |
---|
135 | content += " <Title>Route description test resource</Title>\n"; |
---|
136 | content += " <Id/>\n"; |
---|
137 | content += " <Contact>\n"; |
---|
138 | content += " <Name>Peter Wittenburg</Name>\n"; |
---|
139 | content += " <Address>Wundtlaan 1, 6525 XD Nijmegen</Address>\n"; |
---|
140 | content += " <Email>peter.wittenburg@mpi.nl</Email>\n"; |
---|
141 | content += " <Organisation>Max Planck Institute for Psycholinguistics</Organisation>\n"; |
---|
142 | content += " </Contact>\n"; |
---|
143 | content += " <descriptions>\n"; |
---|
144 | content += " <Description LanguageId=\"\"/>\n"; |
---|
145 | content += " </descriptions>\n"; |
---|
146 | content += " </Project>\n"; |
---|
147 | content += " <Keys>\n"; |
---|
148 | content += " <Key Name=\"conversion.IMDI.1.9to3.0.warning\">Unknown mapping of Genre: conversation|explanation|unspecified --> ???</Key>\n"; |
---|
149 | content += " </Keys>\n"; |
---|
150 | content += " <Content>\n"; |
---|
151 | content += " <Genre>Demo</Genre>\n"; |
---|
152 | content += " <SubGenre>Unspecified</SubGenre>\n"; |
---|
153 | content += " <Task>route description</Task>\n"; |
---|
154 | content += " <Modalities>Speech; Gestures</Modalities>\n"; |
---|
155 | content += " <CommunicationContext>\n"; |
---|
156 | content += " <Interactivity>interactive</Interactivity>\n"; |
---|
157 | content += " <PlanningType>semi-spontaneous</PlanningType>\n"; |
---|
158 | content += " <Involvement>elicited</Involvement>\n"; |
---|
159 | content += " <SocialContext>Unspecified</SocialContext>\n"; |
---|
160 | content += " <EventStructure>Unspecified</EventStructure>\n"; |
---|
161 | content += " <Channel>Unspecified</Channel>\n"; |
---|
162 | content += " </CommunicationContext>\n"; |
---|
163 | content += " <Content_Languages>\n"; |
---|
164 | content += " <descriptions>\n"; |
---|
165 | content += " <Description LanguageId=\"\"/>\n"; |
---|
166 | content += " </descriptions>\n"; |
---|
167 | content += " <Content_Language>\n"; |
---|
168 | content += " <Id>ISO639-3:eng</Id>\n"; |
---|
169 | content += " <Name>English</Name>\n"; |
---|
170 | content += " <descriptions>\n"; |
---|
171 | content += " <Description LanguageId=\"\"/>\n"; |
---|
172 | content += " </descriptions>\n"; |
---|
173 | content += " </Content_Language>\n"; |
---|
174 | content += " </Content_Languages>\n"; |
---|
175 | content += " <Keys>\n"; |
---|
176 | content += " <Key Name=\"IMDI__1_9.Interactional\">conversation</Key>\n"; |
---|
177 | content += " <Key Name=\"IMDI__1_9.Discursive\">explanation</Key>\n"; |
---|
178 | content += " <Key Name=\"IMDI__1_9.Interactional\">Unspecified</Key>\n"; |
---|
179 | content += " </Keys>\n"; |
---|
180 | content += " <descriptions>\n"; |
---|
181 | content += " <Description LanguageId=\"ISO639:eng\">This file was generated from an IMDI 1.9 file and transformed to IMDI 3.0. The substructure of Genre is replaced by two elements named \"Genre\" and \"SubGenre\". The original content of Genre substructure was: Interactional = 'conversation', Discursive = 'explanation', Performance = 'Unspecified'. These values have been added as Keys to the Content information.</Description>\n"; |
---|
182 | content += " <Description LanguageId=\"ISO639:eng\">Peter explains how to come from Nijmegen to Kleve by car, such that Kita would be able to get there.</Description>\n"; |
---|
183 | content += " </descriptions>\n"; |
---|
184 | content += " </Content>\n"; |
---|
185 | content += " <Actors>\n"; |
---|
186 | content += " <descriptions>\n"; |
---|
187 | content += " <Description LanguageId=\"\"/>\n"; |
---|
188 | content += " </descriptions>\n"; |
---|
189 | content += " <Actor>\n"; |
---|
190 | content += " <Role>interviewee</Role>\n"; |
---|
191 | content += " <Name>Peter</Name>\n"; |
---|
192 | content += " <FullName>Peter Wittenburg</FullName>\n"; |
---|
193 | content += " <Code>W</Code>\n"; |
---|
194 | content += " <FamilySocialRole>Unspecified</FamilySocialRole>\n"; |
---|
195 | content += " <EthnicGroup/>\n"; |
---|
196 | content += " <Age>Unknown</Age>\n"; |
---|
197 | content += " <BirthDate>Unspecified</BirthDate>\n"; |
---|
198 | content += " <Sex>Unknown</Sex>\n"; |
---|
199 | content += " <Education>university</Education>\n"; |
---|
200 | content += " <Anonymized>true</Anonymized>\n"; |
---|
201 | content += " <Contact>\n"; |
---|
202 | content += " <Name/>\n"; |
---|
203 | content += " <Address/>\n"; |
---|
204 | content += " <Email/>\n"; |
---|
205 | content += " <Organisation/>\n"; |
---|
206 | content += " </Contact>\n"; |
---|
207 | content += " <Keys/>\n"; |
---|
208 | content += " <descriptions>\n"; |
---|
209 | content += " <Description LanguageId=\"\"/>\n"; |
---|
210 | content += " </descriptions>\n"; |
---|
211 | content += " <Actor_Languages>\n"; |
---|
212 | content += " <descriptions>\n"; |
---|
213 | content += " <Description LanguageId=\"\"/>\n"; |
---|
214 | content += " </descriptions>\n"; |
---|
215 | content += " <Actor_Language>\n"; |
---|
216 | content += " <Id>ISO639-3:nld</Id>\n"; |
---|
217 | content += " <Name>Dutch</Name>\n"; |
---|
218 | content += " <descriptions>\n"; |
---|
219 | content += " <Description LanguageId=\"\"/>\n"; |
---|
220 | content += " </descriptions>\n"; |
---|
221 | content += " </Actor_Language>\n"; |
---|
222 | content += " <Actor_Language>\n"; |
---|
223 | content += " <Id>ISO639-3:deu</Id>\n"; |
---|
224 | content += " <Name>German</Name>\n"; |
---|
225 | content += " <descriptions>\n"; |
---|
226 | content += " <Description LanguageId=\"\"/>\n"; |
---|
227 | content += " </descriptions>\n"; |
---|
228 | content += " </Actor_Language>\n"; |
---|
229 | content += " <Actor_Language>\n"; |
---|
230 | content += " <Id>ISO639-3:eng</Id>\n"; |
---|
231 | content += " <Name>English</Name>\n"; |
---|
232 | content += " <descriptions>\n"; |
---|
233 | content += " <Description LanguageId=\"\"/>\n"; |
---|
234 | content += " </descriptions>\n"; |
---|
235 | content += " </Actor_Language>\n"; |
---|
236 | content += " </Actor_Languages>\n"; |
---|
237 | content += " </Actor>\n"; |
---|
238 | content += " <Actor>\n"; |
---|
239 | content += " <Role>interviewer</Role>\n"; |
---|
240 | content += " <Name>Kita</Name>\n"; |
---|
241 | content += " <FullName>Sotaro Kita</FullName>\n"; |
---|
242 | content += " <Code>k</Code>\n"; |
---|
243 | content += " <FamilySocialRole>Unspecified</FamilySocialRole>\n"; |
---|
244 | content += " <EthnicGroup/>\n"; |
---|
245 | content += " <Age>Unknown</Age>\n"; |
---|
246 | content += " <BirthDate>Unspecified</BirthDate>\n"; |
---|
247 | content += " <Sex>Unknown</Sex>\n"; |
---|
248 | content += " <Education>university</Education>\n"; |
---|
249 | content += " <Anonymized>true</Anonymized>\n"; |
---|
250 | content += " <Contact>\n"; |
---|
251 | content += " <Name/>\n"; |
---|
252 | content += " <Address/>\n"; |
---|
253 | content += " <Email/>\n"; |
---|
254 | content += " <Organisation/>\n"; |
---|
255 | content += " </Contact>\n"; |
---|
256 | content += " <Keys/>\n"; |
---|
257 | content += " <descriptions>\n"; |
---|
258 | content += " <Description LanguageId=\"\"/>\n"; |
---|
259 | content += " </descriptions>\n"; |
---|
260 | content += " <Actor_Languages>\n"; |
---|
261 | content += " <descriptions>\n"; |
---|
262 | content += " <Description LanguageId=\"\"/>\n"; |
---|
263 | content += " </descriptions>\n"; |
---|
264 | content += " <Actor_Language>\n"; |
---|
265 | content += " <Id>ISO639-3:eng</Id>\n"; |
---|
266 | content += " <Name>English</Name>\n"; |
---|
267 | content += " <descriptions>\n"; |
---|
268 | content += " <Description LanguageId=\"\"/>\n"; |
---|
269 | content += " </descriptions>\n"; |
---|
270 | content += " </Actor_Language>\n"; |
---|
271 | content += " <Actor_Language>\n"; |
---|
272 | content += " <Id>ISO639-3:jpn</Id>\n"; |
---|
273 | content += " <Name>Japanese</Name>\n"; |
---|
274 | content += " <descriptions>\n"; |
---|
275 | content += " <Description LanguageId=\"\"/>\n"; |
---|
276 | content += " </descriptions>\n"; |
---|
277 | content += " </Actor_Language>\n"; |
---|
278 | content += " </Actor_Languages>\n"; |
---|
279 | content += " </Actor>\n"; |
---|
280 | content += " <Actor>\n"; |
---|
281 | content += " <Role>Collector</Role>\n"; |
---|
282 | content += " <Name>Peter Wittenburg</Name>\n"; |
---|
283 | content += " <FullName>Peter Wittenburg</FullName>\n"; |
---|
284 | content += " <Code>Unspecified</Code>\n"; |
---|
285 | content += " <FamilySocialRole>Unspecified</FamilySocialRole>\n"; |
---|
286 | content += " <EthnicGroup/>\n"; |
---|
287 | content += " <Age>Unspecified</Age>\n"; |
---|
288 | content += " <BirthDate>Unspecified</BirthDate>\n"; |
---|
289 | content += " <Sex>Unspecified</Sex>\n"; |
---|
290 | content += " <Education/>\n"; |
---|
291 | content += " <Anonymized>false</Anonymized>\n"; |
---|
292 | content += " <Contact>\n"; |
---|
293 | content += " <Name>Peter Wittenburg</Name>\n"; |
---|
294 | content += " <Address>Wundtlaan 1, 6525 XD Nijmegen</Address>\n"; |
---|
295 | content += " <Email>peter.wittenburg@mpi.nl</Email>\n"; |
---|
296 | content += " <Organisation>Max-Planck-Institute for Psycholinguistics</Organisation>\n"; |
---|
297 | content += " </Contact>\n"; |
---|
298 | content += " <Keys/>\n"; |
---|
299 | content += " <descriptions>\n"; |
---|
300 | content += " <Description LanguageId=\"\"/>\n"; |
---|
301 | content += " </descriptions>\n"; |
---|
302 | content += " <Actor_Languages/>\n"; |
---|
303 | content += " </Actor>\n"; |
---|
304 | content += " </Actors>\n"; |
---|
305 | content += " </MDGroup>\n"; |
---|
306 | content += " <Resources>\n"; |
---|
307 | content += " <MediaFile ref=\"d314e408\">\n"; |
---|
308 | content += " <ResourceLink>../Media/elan-example1.mpg</ResourceLink>\n"; |
---|
309 | content += " <Type>video</Type>\n"; |
---|
310 | content += " <Format>video/x-mpeg1</Format>\n"; |
---|
311 | content += " <Size/>\n"; |
---|
312 | content += " <Quality>Unknown</Quality>\n"; |
---|
313 | content += " <RecordingConditions>excellent</RecordingConditions>\n"; |
---|
314 | content += " <TimePosition>\n"; |
---|
315 | content += " <Start>Unknown</Start>\n"; |
---|
316 | content += " <End>Unknown</End>\n"; |
---|
317 | content += " </TimePosition>\n"; |
---|
318 | content += " <Access>\n"; |
---|
319 | content += " <Availability>openly available</Availability>\n"; |
---|
320 | content += " <Date>2003-02-12</Date>\n"; |
---|
321 | content += " <Owner>MPI for Psycholinguistics</Owner>\n"; |
---|
322 | content += " <Publisher/>\n"; |
---|
323 | content += " <Contact>\n"; |
---|
324 | content += " <Name>Romuald Skiba</Name>\n"; |
---|
325 | content += " <Address/>\n"; |
---|
326 | content += " <Email/>\n"; |
---|
327 | content += " <Organisation/>\n"; |
---|
328 | content += " </Contact>\n"; |
---|
329 | content += " <descriptions>\n"; |
---|
330 | content += " <Description LanguageId=\"\"/>\n"; |
---|
331 | content += " </descriptions>\n"; |
---|
332 | content += " </Access>\n"; |
---|
333 | content += " <descriptions>\n"; |
---|
334 | content += " <Description LanguageId=\"\"/>\n"; |
---|
335 | content += " </descriptions>\n"; |
---|
336 | content += " <Keys/>\n"; |
---|
337 | content += " </MediaFile>\n"; |
---|
338 | content += " <MediaFile ref=\"d314e471\">\n"; |
---|
339 | content += " <ResourceLink>../Media/elan-example1.mp4</ResourceLink>\n"; |
---|
340 | content += " <Type>video</Type>\n"; |
---|
341 | content += " <Format>video/mp4</Format>\n"; |
---|
342 | content += " <Size/>\n"; |
---|
343 | content += " <Quality>Unknown</Quality>\n"; |
---|
344 | content += " <RecordingConditions>excellent</RecordingConditions>\n"; |
---|
345 | content += " <TimePosition>\n"; |
---|
346 | content += " <Start>Unknown</Start>\n"; |
---|
347 | content += " <End>Unknown</End>\n"; |
---|
348 | content += " </TimePosition>\n"; |
---|
349 | content += " <Access>\n"; |
---|
350 | content += " <Availability>openly available</Availability>\n"; |
---|
351 | content += " <Date>2003-02-12</Date>\n"; |
---|
352 | content += " <Owner>MPI for Psycholinguistics</Owner>\n"; |
---|
353 | content += " <Publisher/>\n"; |
---|
354 | content += " <Contact>\n"; |
---|
355 | content += " <Name>Romuald Skiba</Name>\n"; |
---|
356 | content += " <Address/>\n"; |
---|
357 | content += " <Email/>\n"; |
---|
358 | content += " <Organisation/>\n"; |
---|
359 | content += " </Contact>\n"; |
---|
360 | content += " <descriptions>\n"; |
---|
361 | content += " <Description LanguageId=\"\"/>\n"; |
---|
362 | content += " </descriptions>\n"; |
---|
363 | content += " </Access>\n"; |
---|
364 | content += " <descriptions>\n"; |
---|
365 | content += " <Description LanguageId=\"\"/>\n"; |
---|
366 | content += " </descriptions>\n"; |
---|
367 | content += " <Keys/>\n"; |
---|
368 | content += " </MediaFile>\n"; |
---|
369 | content += " </Resources>\n"; |
---|
370 | content += " <References>\n"; |
---|
371 | content += " <descriptions>\n"; |
---|
372 | content += " <Description LanguageId=\"\"/>\n"; |
---|
373 | content += " </descriptions>\n"; |
---|
374 | content += " </References>\n"; |
---|
375 | content += " </Session>\n"; |
---|
376 | content += " </Components>\n"; |
---|
377 | content += "</CMD>\n"; |
---|
378 | File cmdiFile = createCmdiFile("testSession", content); |
---|
379 | CMDIDataProcessor processor = getDataParser(); |
---|
380 | CMDIData data = processor.process(cmdiFile); |
---|
381 | assertEquals("test-hdl:1839/00-0000-0000-0009-294C-9", data.getId()); |
---|
382 | List<Resource> resources = data.getMetadataResources(); |
---|
383 | assertEquals(0, resources.size()); |
---|
384 | List<Resource> dataResources = data.getDataResources(); |
---|
385 | assertEquals(2, dataResources.size()); |
---|
386 | Resource res = dataResources.get(0); |
---|
387 | assertEquals("../Media/elan-example1.mpg", res.getResourceName()); |
---|
388 | assertEquals("video/x-mpeg1", res.getMimeType()); |
---|
389 | res = dataResources.get(1); |
---|
390 | assertEquals("../Media/elan-example1.mp3", res.getResourceName()); |
---|
391 | assertEquals("audio/mpeg", res.getMimeType()); |
---|
392 | SolrInputDocument doc = data.getSolrDocument(); |
---|
393 | assertNotNull(doc); |
---|
394 | assertEquals(14, doc.getFieldNames().size()); |
---|
395 | assertEquals("kleve-route", doc.getFieldValue("name")); |
---|
396 | assertEquals("Peter Wittenburg", doc.getFieldValue(FacetConstants.FIELD_PROJECT_NAME)); |
---|
397 | assertEquals("Europe", doc.getFieldValue("continent")); |
---|
398 | assertEquals("English", doc.getFieldValue("language")); |
---|
399 | assertEquals("Netherlands", doc.getFieldValue("country")); |
---|
400 | assertEquals("Max Planck Institute for Psycholinguistics", doc.getFieldValue("organisation")); |
---|
401 | assertEquals("demo", doc.getFieldValue("genre")); |
---|
402 | assertEquals( |
---|
403 | "This recording was made to generate a freely available test resource including speech and gestures. The annotations were created by Peter and Kita who is gesture researcher at the MPI for Psycholinguistics.", |
---|
404 | doc.getFieldValue("description")); |
---|
405 | assertEquals("2002", doc.getFieldValue("year")); |
---|
406 | List<String> fieldValues = new ArrayList(doc.getFieldValues(FacetConstants.FIELD_FORMAT)); |
---|
407 | assertEquals(2, fieldValues.size()); |
---|
408 | assertEquals("video/x-mpeg1", fieldValues.get(0)); |
---|
409 | assertEquals("video/mp4", fieldValues.get(1)); |
---|
410 | assertEquals(null, doc.getFieldValue("subject")); |
---|
411 | } |
---|
412 | |
---|
413 | @Test |
---|
414 | public void testCreateCMDISessionSmall() throws Exception { |
---|
415 | |
---|
416 | // make sure the mapping file for testing is used |
---|
417 | VloConfig.setFacetConceptsFile("/facetConceptsTest.xml"); |
---|
418 | |
---|
419 | String content = ""; |
---|
420 | content += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; |
---|
421 | content += "<CMD xmlns=\"http://www.clarin.eu/cmd/\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n"; |
---|
422 | content += " xsi:schemaLocation=\"http://www.clarin.eu/cmd http://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/profiles/clarin.eu:cr1:p_1271859438204/xsd\">\n"; |
---|
423 | content += " <Header>\n"; |
---|
424 | content += " <MdCreationDate>2008-05-27</MdCreationDate>\n"; |
---|
425 | content += " <MdSelfLink>test-hdl:1839/00-0000-0000-0009-294C-9</MdSelfLink>\n"; |
---|
426 | content += " <MdProfile>clarin.eu:cr1:p_1271859438204</MdProfile>\n"; |
---|
427 | content += " </Header>\n"; |
---|
428 | content += " <Resources>\n"; |
---|
429 | content += " </Resources>\n"; |
---|
430 | content += " <Components>\n"; |
---|
431 | content += " <Session>\n"; |
---|
432 | content += " <Name>kleve-route</Name>\n"; |
---|
433 | content += " </Session>\n"; |
---|
434 | content += " </Components>\n"; |
---|
435 | content += "</CMD>\n"; |
---|
436 | File cmdiFile = createCmdiFile("testSession", content); |
---|
437 | CMDIDataProcessor processor = getDataParser(); |
---|
438 | CMDIData data = processor.process(cmdiFile); |
---|
439 | assertEquals("kleve-route", data.getSolrDocument().getFieldValue(FacetConstants.FIELD_NAME)); |
---|
440 | } |
---|
441 | |
---|
442 | @Test |
---|
443 | public void testEmptyFieldsShouldBeNull() throws Exception { |
---|
444 | |
---|
445 | // make sure the mapping file for testing is used |
---|
446 | VloConfig.setFacetConceptsFile("/facetConceptsTest.xml"); |
---|
447 | |
---|
448 | String content = ""; |
---|
449 | content += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; |
---|
450 | content += "<CMD xmlns=\"http://www.clarin.eu/cmd/\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n"; |
---|
451 | content += " xsi:schemaLocation=\"http://www.clarin.eu/cmd http://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/profiles/clarin.eu:cr1:p_1271859438204/xsd\">\n"; |
---|
452 | content += " <Header>\n"; |
---|
453 | content += " <MdCreationDate>2008-05-27</MdCreationDate>\n"; |
---|
454 | content += " <MdSelfLink>test-hdl:1839/00-0000-0000-0009-294C-9</MdSelfLink>\n"; |
---|
455 | content += " <MdProfile>clarin.eu:cr1:p_1271859438204</MdProfile>\n"; |
---|
456 | content += " </Header>\n"; |
---|
457 | content += " <Resources>\n"; |
---|
458 | content += " <ResourceProxyList>\n"; |
---|
459 | content += " </ResourceProxyList>\n"; |
---|
460 | content += " <JournalFileProxyList/>\n"; |
---|
461 | content += " <ResourceRelationList/>\n"; |
---|
462 | content += " </Resources>\n"; |
---|
463 | content += " <Components>\n"; |
---|
464 | content += " <Session>\n"; |
---|
465 | content += " <Name>kleve-route</Name>\n"; |
---|
466 | content += " <Title>route description to Kleve</Title>\n"; |
---|
467 | content += " <Date></Date>\n"; |
---|
468 | content += " <descriptions>\n"; |
---|
469 | content += " <Description LanguageId=\"ISO639-2:eng\">Test.</Description>\n"; |
---|
470 | content += " </descriptions>\n"; |
---|
471 | content += " <MDGroup>\n"; |
---|
472 | content += " <Location>\n"; |
---|
473 | content += " <Continent>Europe</Continent>\n"; |
---|
474 | content += " <Country>Netherlands</Country>\n"; |
---|
475 | content += " <Region/>\n"; |
---|
476 | content += " <Address>Wundtlaan 1, Nijmegen</Address>\n"; |
---|
477 | content += " </Location>\n"; |
---|
478 | content += " <Project>\n"; |
---|
479 | content += " <Name></Name>\n"; |
---|
480 | content += " <Title></Title>\n"; |
---|
481 | content += " <Id/>\n"; |
---|
482 | content += " <Contact>\n"; |
---|
483 | content += " <Name></Name>\n"; |
---|
484 | content += " <Address></Address>\n"; |
---|
485 | content += " <Email></Email>\n"; |
---|
486 | content += " <Organisation></Organisation>\n"; |
---|
487 | content += " </Contact>\n"; |
---|
488 | content += " <descriptions>\n"; |
---|
489 | content += " <Description LanguageId=\"\"/>\n"; |
---|
490 | content += " </descriptions>\n"; |
---|
491 | content += " </Project>\n"; |
---|
492 | content += " <Keys>\n"; |
---|
493 | content += " </Keys>\n"; |
---|
494 | content += " <Content>\n"; |
---|
495 | content += " <Genre>Demo</Genre>\n"; |
---|
496 | content += " <SubGenre>Unspecified</SubGenre>\n"; |
---|
497 | content += " <Task>route description</Task>\n"; |
---|
498 | content += " <Modalities>Speech; Gestures</Modalities>\n"; |
---|
499 | content += " <CommunicationContext>\n"; |
---|
500 | content += " </CommunicationContext>\n"; |
---|
501 | content += " <Content_Languages>\n"; |
---|
502 | content += " </Content_Languages>\n"; |
---|
503 | content += " <descriptions>\n"; |
---|
504 | content += " </descriptions>\n"; |
---|
505 | content += " </Content>\n"; |
---|
506 | content += " <Actors>\n"; |
---|
507 | content += " </Actors>\n"; |
---|
508 | content += " </MDGroup>\n"; |
---|
509 | content += " <Resources>\n"; |
---|
510 | content += " </Resources>\n"; |
---|
511 | content += " </Session>\n"; |
---|
512 | content += " </Components>\n"; |
---|
513 | content += "</CMD>\n"; |
---|
514 | File cmdiFile = createCmdiFile("testSession", content); |
---|
515 | CMDIDataProcessor processor = getDataParser(); |
---|
516 | CMDIData data = processor.process(cmdiFile); |
---|
517 | assertEquals("test-hdl:1839/00-0000-0000-0009-294C-9", data.getId()); |
---|
518 | List<Resource> resources = data.getMetadataResources(); |
---|
519 | assertEquals(0, resources.size()); |
---|
520 | SolrInputDocument doc = data.getSolrDocument(); |
---|
521 | assertNotNull(doc); |
---|
522 | assertEquals(8, doc.getFieldNames().size()); |
---|
523 | assertEquals("kleve-route", doc.getFieldValue("name")); |
---|
524 | assertEquals("Europe", doc.getFieldValue("continent")); |
---|
525 | assertEquals("Netherlands", doc.getFieldValue("country")); |
---|
526 | assertEquals("demo", doc.getFieldValue("genre")); |
---|
527 | assertEquals("Test.", doc.getFieldValue("description")); |
---|
528 | assertEquals("Should be null not empty string", null, doc.getFieldValue("organisation")); |
---|
529 | assertEquals(null, doc.getFieldValue("language")); |
---|
530 | assertEquals(null, doc.getFieldValue("subject")); |
---|
531 | assertEquals(null, doc.getFieldValue("year")); |
---|
532 | } |
---|
533 | |
---|
534 | @Test |
---|
535 | public void testOlac() throws Exception { |
---|
536 | |
---|
537 | // make sure the mapping file for testing is used |
---|
538 | VloConfig.setFacetConceptsFile("/facetConceptsTest.xml"); |
---|
539 | |
---|
540 | String content = ""; |
---|
541 | content += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; |
---|
542 | content += "<CMD xmlns=\"http://www.clarin.eu/cmd/\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n"; |
---|
543 | content += " xmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n"; |
---|
544 | content += " xmlns:oai_dc=\"http://www.openarchives.org/OAI/2.0/oai_dc/\"\n"; |
---|
545 | content += " xmlns:defns=\"http://www.openarchives.org/OAI/2.0/\"\n"; |
---|
546 | content += " xsi:schemaLocation=\"http://www.clarin.eu/cmd http://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/profiles/clarin.eu:cr1:p_1288172614026/xsd\">\n"; |
---|
547 | content += " <Header>\n"; |
---|
548 | content += " <MdCreator>olac2cmdi.xsl</MdCreator>\n"; |
---|
549 | content += " <MdCreationDate>2002-12-14</MdCreationDate>\n"; |
---|
550 | content += " <MdSelfLink>oai:ailla.utexas.edu:1</MdSelfLink>\n"; |
---|
551 | content += " <MdProfile>clarin.eu:cr1:p_1288172614026</MdProfile>\n"; |
---|
552 | content += " </Header>\n"; |
---|
553 | content += " <Resources>\n"; |
---|
554 | content += " <ResourceProxyList/>\n"; |
---|
555 | content += " <JournalFileProxyList/>\n"; |
---|
556 | content += " <ResourceRelationList/>\n"; |
---|
557 | content += " </Resources>\n"; |
---|
558 | content += " <Components>\n"; |
---|
559 | content += " <OLAC-DcmiTerms>\n"; |
---|
560 | content += " <creator>Joel Sherzer (recorder)</creator>\n"; |
---|
561 | content += " <description>\n"; |
---|
562 | content += " Channel: Talking;\n"; |
---|
563 | content += " Genre: Traditional Narrative / Story;\n"; |
---|
564 | content += " Country: Panama;\n"; |
---|
565 | content += " Place of Recording: Mulatuppu;\n"; |
---|
566 | content += " Event: Community Gathering;\n"; |
---|
567 | content += " Institutional Affiliation: University of Texas at Austin;\n"; |
---|
568 | content += " Participant Information: Political Leader;\n"; |
---|
569 | content += " </description>\n"; |
---|
570 | content += " <description>The one-eyed grandmother is one of many traditional Kuna stories performed in the Kuna gathering house. This story, performed here by Pedro Arias, combines European derived motifs (Tom Thumb and Hansel and Gretel) with themes that seem more Kuna in origin. All are woven together and a moral is provided. Pedro Arias performed this story before a gathered audience in the morning..\n"; |
---|
571 | content += " </description>\n"; |
---|
572 | content += " <description>Test</description>\n"; |
---|
573 | content += " <identifier>http://uts.cc.utexas.edu/~ailla/audio/sherzer/one_eyed_grandmother.ram</identifier>\n"; |
---|
574 | content += " <identifier>http://uts.cc.utexas.edu/~ailla/texts/sherzer/one_eyed_grandmother.pdf</identifier>\n"; |
---|
575 | content += " <language olac-language=\"x-sil-CHN\"/>\n"; |
---|
576 | content += " <language>Chinese</language>\n"; |
---|
577 | content += " <subject olac-linguistic-field=\"testSubject\">Kuna</subject>\n"; |
---|
578 | content += " <type olac-linguistic-type=\"Transcription\"/>\n"; |
---|
579 | content += " <format>WAV</format>\n"; |
---|
580 | content += " <type dcterms-type=\"DCMIType\">Sound</type>\n"; |
---|
581 | content += " </OLAC-DcmiTerms>\n"; |
---|
582 | content += " </Components>\n"; |
---|
583 | content += "</CMD>\n"; |
---|
584 | |
---|
585 | File cmdiFile = createCmdiFile("testOlac", content); |
---|
586 | CMDIDataProcessor processor = getDataParser(); |
---|
587 | CMDIData data = processor.process(cmdiFile); |
---|
588 | assertEquals("oai:ailla.utexas.edu:1", data.getId()); |
---|
589 | List<Resource> resources = data.getMetadataResources(); |
---|
590 | assertEquals(0, resources.size()); |
---|
591 | List<Resource> dataResources = data.getDataResources(); |
---|
592 | assertEquals(0, dataResources.size()); |
---|
593 | SolrInputDocument doc = data.getSolrDocument(); |
---|
594 | assertNotNull(doc); |
---|
595 | assertEquals(8, doc.getFieldNames().size()); |
---|
596 | assertEquals(null, doc.getFieldValue("name")); |
---|
597 | assertEquals(null, doc.getFieldValue("continent")); |
---|
598 | assertEquals(1, doc.getFieldValues("language").size()); |
---|
599 | assertEquals("x-sil-CHN", doc.getFieldValue("language")); |
---|
600 | assertEquals(null, doc.getFieldValue("country")); |
---|
601 | assertEquals(null, doc.getFieldValue("organisation")); |
---|
602 | assertEquals("transcription", doc.getFieldValue("genre")); |
---|
603 | assertEquals("kuna", doc.getFieldValue("subject")); |
---|
604 | Collection<Object> fieldValues = doc.getFieldValues("description"); |
---|
605 | assertEquals(3, fieldValues.size()); |
---|
606 | List<String> descriptions = new ArrayList(fieldValues); |
---|
607 | Collections.sort(descriptions); |
---|
608 | assertEquals("Channel: Talking;\n Genre: Traditional Narrative / Story;\n Country: Panama;\n" |
---|
609 | + " Place of Recording: Mulatuppu;\n Event: Community Gathering;\n" |
---|
610 | + " Institutional Affiliation: University of Texas at Austin;\n Participant Information: Political Leader;", descriptions.get(0).toString()); |
---|
611 | assertEquals("Test", descriptions.get(1).toString()); |
---|
612 | assertEquals("The one-eyed grandmother is one of many traditional Kuna stories performed " |
---|
613 | + "in the Kuna gathering house. This story, performed here by Pedro Arias, combines " |
---|
614 | + "European derived motifs (Tom Thumb and Hansel and Gretel) with themes that seem more " |
---|
615 | + "Kuna in origin. All are woven together and a moral is provided. Pedro Arias performed " |
---|
616 | + "this story before a gathered audience in the morning..", descriptions.get(2).toString()); |
---|
617 | assertEquals("Sound", doc.getFieldValue(FacetConstants.FIELD_RESOURCE_CLASS)); |
---|
618 | } |
---|
619 | |
---|
620 | @Test |
---|
621 | public void testOlacMultiFacets() throws Exception { |
---|
622 | |
---|
623 | // make sure the mapping file for testing is used |
---|
624 | VloConfig.setFacetConceptsFile("/facetConceptsTest.xml"); |
---|
625 | |
---|
626 | String content = ""; |
---|
627 | content += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; |
---|
628 | content += "<CMD xmlns=\"http://www.clarin.eu/cmd/\">\n"; |
---|
629 | content += " <Header>\n"; |
---|
630 | content += " <MdProfile>clarin.eu:cr1:p_1288172614026</MdProfile>\n"; |
---|
631 | content += " </Header>\n"; |
---|
632 | content += " <Components>\n"; |
---|
633 | content += " <OLAC-DcmiTerms>\n"; |
---|
634 | content += " <subject olac-linguistic-field=\"testSubject\">Kuna</subject>\n"; |
---|
635 | content += " <subject dcterms-type=\"LCSH\">testSubjectFallback</subject>\n"; |
---|
636 | content += " <spatial dcterms-type=\"ISO3166\">testCountry1</spatial>\n"; |
---|
637 | content += " <coverage dcterms-type=\"ISO3166\">testCountry2</coverage>\n"; |
---|
638 | content += " <language olac-language=\"language1\">test1</language>\n"; |
---|
639 | content += " <subject olac-language=\"language2\">test2</subject>\n"; |
---|
640 | content += " <subject olac-language=\"language2\">test2</subject>\n"; |
---|
641 | content += " </OLAC-DcmiTerms>\n"; |
---|
642 | content += " </Components>\n"; |
---|
643 | content += "</CMD>\n"; |
---|
644 | |
---|
645 | File cmdiFile = createCmdiFile("testOlac", content); |
---|
646 | CMDIDataProcessor processor = getDataParser(); |
---|
647 | CMDIData data = processor.process(cmdiFile); |
---|
648 | SolrInputDocument doc = data.getSolrDocument(); |
---|
649 | assertEquals(3, doc.getFieldValues(FacetConstants.FIELD_SUBJECT).size()); |
---|
650 | assertTrue(doc.getFieldValues(FacetConstants.FIELD_SUBJECT).contains("kuna")); |
---|
651 | assertEquals(2, doc.getFieldValues(FacetConstants.FIELD_COUNTRY).size()); |
---|
652 | assertTrue(doc.getFieldValues(FacetConstants.FIELD_COUNTRY).contains("testCountry1")); |
---|
653 | assertTrue(doc.getFieldValues(FacetConstants.FIELD_COUNTRY).contains("testCountry2")); |
---|
654 | assertEquals(2, doc.getFieldValues(FacetConstants.FIELD_LANGUAGE).size()); |
---|
655 | assertTrue(doc.getFieldValues(FacetConstants.FIELD_LANGUAGE).contains("language1")); |
---|
656 | assertTrue(doc.getFieldValues(FacetConstants.FIELD_LANGUAGE).contains("language2")); |
---|
657 | |
---|
658 | content = ""; |
---|
659 | content += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; |
---|
660 | content += "<CMD xmlns=\"http://www.clarin.eu/cmd/\">\n"; |
---|
661 | content += " <Header>\n"; |
---|
662 | content += " <MdProfile>clarin.eu:cr1:p_1288172614026</MdProfile>\n"; |
---|
663 | content += " </Header>\n"; |
---|
664 | content += " <Components>\n"; |
---|
665 | content += " <OLAC-DcmiTerms>\n"; |
---|
666 | content += " <subject dcterms-type=\"LCSH\">testSubjectFallback</subject>\n"; |
---|
667 | content += " <coverage dcterms-type=\"ISO3166\">testCountry2</coverage>\n"; |
---|
668 | content += " <subject olac-language=\"language2\">test2</subject>\n"; |
---|
669 | content += " </OLAC-DcmiTerms>\n"; |
---|
670 | content += " </Components>\n"; |
---|
671 | content += "</CMD>\n"; |
---|
672 | |
---|
673 | cmdiFile = createCmdiFile("testOlac", content); |
---|
674 | processor = getDataParser(); |
---|
675 | data = processor.process(cmdiFile); |
---|
676 | doc = data.getSolrDocument(); |
---|
677 | assertEquals(2, doc.getFieldValues(FacetConstants.FIELD_SUBJECT).size()); |
---|
678 | assertEquals("testsubjectfallback", doc.getFieldValue(FacetConstants.FIELD_SUBJECT)); |
---|
679 | assertEquals(1, doc.getFieldValues(FacetConstants.FIELD_COUNTRY).size()); |
---|
680 | assertEquals("testCountry2", doc.getFieldValue(FacetConstants.FIELD_COUNTRY)); |
---|
681 | assertEquals(1, doc.getFieldValues(FacetConstants.FIELD_LANGUAGE).size()); |
---|
682 | assertEquals("language2", doc.getFieldValue(FacetConstants.FIELD_LANGUAGE)); |
---|
683 | |
---|
684 | content = ""; |
---|
685 | content += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; |
---|
686 | content += "<CMD xmlns=\"http://www.clarin.eu/cmd/\">\n"; |
---|
687 | content += " <Header>\n"; |
---|
688 | content += " <MdProfile>clarin.eu:cr1:p_1288172614026</MdProfile>\n"; |
---|
689 | content += " </Header>\n"; |
---|
690 | content += " <Components>\n"; |
---|
691 | content += " <OLAC-DcmiTerms>\n"; |
---|
692 | content += " <subject dcterms-type=\"LCSH\">testSubjectFallback</subject>\n"; |
---|
693 | content += " <subject olac-linguistic-field=\"testSubject\">Kuna</subject>\n"; |
---|
694 | content += " <coverage dcterms-type=\"ISO3166\">testCountry2</coverage>\n"; |
---|
695 | content += " <spatial dcterms-type=\"ISO3166\">testCountry1</spatial>\n"; |
---|
696 | content += " <subject olac-language=\"language1\">test2</subject>\n"; |
---|
697 | content += " <language olac-language=\"language1\">test1</language>\n"; |
---|
698 | content += " </OLAC-DcmiTerms>\n"; |
---|
699 | content += " </Components>\n"; |
---|
700 | content += "</CMD>\n"; |
---|
701 | |
---|
702 | cmdiFile = createCmdiFile("testOlac", content); |
---|
703 | processor = getDataParser(); |
---|
704 | data = processor.process(cmdiFile); |
---|
705 | doc = data.getSolrDocument(); |
---|
706 | assertEquals(3, doc.getFieldValues("subject").size()); |
---|
707 | assertEquals("testsubjectfallback", doc.getFieldValue("subject")); |
---|
708 | assertEquals(2, doc.getFieldValues(FacetConstants.FIELD_COUNTRY).size()); |
---|
709 | assertTrue(doc.getFieldValues(FacetConstants.FIELD_COUNTRY).contains("testCountry1")); |
---|
710 | assertTrue(doc.getFieldValues(FacetConstants.FIELD_COUNTRY).contains("testCountry2")); |
---|
711 | assertEquals(1, doc.getFieldValues(FacetConstants.FIELD_LANGUAGE).size()); |
---|
712 | assertTrue(doc.getFieldValues(FacetConstants.FIELD_LANGUAGE).contains("language1")); |
---|
713 | } |
---|
714 | |
---|
715 | @Test |
---|
716 | public void testIgnoreWhiteSpaceFacets() throws Exception { |
---|
717 | |
---|
718 | // make sure the mapping file for testing is used |
---|
719 | VloConfig.setFacetConceptsFile("/facetConceptsTest.xml"); |
---|
720 | |
---|
721 | String content = ""; |
---|
722 | content += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; |
---|
723 | content += "<CMD xmlns=\"http://www.clarin.eu/cmd/\">\n"; |
---|
724 | content += " <Header>\n"; |
---|
725 | content += " <MdProfile>clarin.eu:cr1:p_1288172614026</MdProfile>\n"; |
---|
726 | content += " </Header>\n"; |
---|
727 | content += " <Components>\n"; |
---|
728 | content += " <OLAC-DcmiTerms>\n"; |
---|
729 | content += " <subject olac-linguistic-field=\"\n\n\t\t\t\">Kuna</subject>\n"; |
---|
730 | content += " </OLAC-DcmiTerms>\n"; |
---|
731 | content += " </Components>\n"; |
---|
732 | content += "</CMD>\n"; |
---|
733 | |
---|
734 | File cmdiFile = createCmdiFile("testOlac", content); |
---|
735 | CMDIDataProcessor processor = getDataParser(); |
---|
736 | CMDIData data = processor.process(cmdiFile); |
---|
737 | SolrInputDocument doc = data.getSolrDocument(); |
---|
738 | assertTrue(doc.getFieldValues("subject").contains("kuna")); |
---|
739 | } |
---|
740 | |
---|
741 | @Test |
---|
742 | public void testCountryCodesPostProcessing() throws Exception { |
---|
743 | |
---|
744 | // make sure the mapping file for testing is used |
---|
745 | VloConfig.setFacetConceptsFile("/facetConceptsTest.xml"); |
---|
746 | |
---|
747 | String content = ""; |
---|
748 | content += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; |
---|
749 | content += "<CMD xmlns=\"http://www.clarin.eu/cmd/\">\n"; |
---|
750 | content += " <Header>\n"; |
---|
751 | content += " <MdProfile>clarin.eu:cr1:p_1288172614026</MdProfile>\n"; |
---|
752 | content += " </Header>\n"; |
---|
753 | content += " <Components>\n"; |
---|
754 | content += " <OLAC-DcmiTerms>\n"; |
---|
755 | content += " <coverage dcterms-type=\"ISO3166\">NL</coverage>\n"; |
---|
756 | content += " </OLAC-DcmiTerms>\n"; |
---|
757 | content += " </Components>\n"; |
---|
758 | content += "</CMD>\n"; |
---|
759 | |
---|
760 | File cmdiFile = createCmdiFile("testOlac", content); |
---|
761 | CMDIDataProcessor processor = getDataParser(); |
---|
762 | CMDIData data = processor.process(cmdiFile); |
---|
763 | SolrInputDocument doc = data.getSolrDocument(); |
---|
764 | assertEquals("Netherlands", doc.getFieldValue(FacetConstants.FIELD_COUNTRY)); |
---|
765 | } |
---|
766 | |
---|
767 | @Test |
---|
768 | public void testLanguageCodesPostProcessing() throws Exception { |
---|
769 | |
---|
770 | // make sure the mapping file for testing is used |
---|
771 | VloConfig.setFacetConceptsFile("/facetConceptsTest.xml"); |
---|
772 | |
---|
773 | String content = ""; |
---|
774 | content += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; |
---|
775 | content += "<CMD xmlns=\"http://www.clarin.eu/cmd/\">\n"; |
---|
776 | content += " <Header>\n"; |
---|
777 | content += " <MdProfile>clarin.eu:cr1:p_1288172614026</MdProfile>\n"; |
---|
778 | content += " </Header>\n"; |
---|
779 | content += " <Components>\n"; |
---|
780 | content += " <OLAC-DcmiTerms>\n"; |
---|
781 | content += " <language olac-language=\"fr\"/>\n"; |
---|
782 | content += " <language olac-language=\"spa\"/>\n"; |
---|
783 | content += " </OLAC-DcmiTerms>\n"; |
---|
784 | content += " </Components>\n"; |
---|
785 | content += "</CMD>\n"; |
---|
786 | |
---|
787 | File cmdiFile = createCmdiFile("testOlac", content); |
---|
788 | CMDIDataProcessor processor = getDataParser(); |
---|
789 | CMDIData data = processor.process(cmdiFile); |
---|
790 | SolrInputDocument doc = data.getSolrDocument(); |
---|
791 | Collection<Object> values = doc.getFieldValues(FacetConstants.FIELD_LANGUAGE); |
---|
792 | assertEquals(2, values.size()); |
---|
793 | Iterator<Object> iter = values.iterator(); |
---|
794 | assertEquals("French", iter.next()); |
---|
795 | assertEquals("Spanish; Castilian", iter.next()); |
---|
796 | } |
---|
797 | |
---|
798 | @Test |
---|
799 | public void testOlacCollection() throws Exception { |
---|
800 | |
---|
801 | // make sure the mapping file for testing is used |
---|
802 | VloConfig.setFacetConceptsFile("/facetConceptsTest.xml"); |
---|
803 | |
---|
804 | String content = ""; |
---|
805 | content += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; |
---|
806 | content += "<CMD xmlns=\"http://www.clarin.eu/cmd/\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n"; |
---|
807 | content += " xsi:schemaLocation=\"http://www.clarin.eu/cmd http://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/profiles/clarin.eu:cr1:p_1288172614026/xsd\">\n"; |
---|
808 | content += " <Header>\n"; |
---|
809 | content += " <MdCreator>dir2cmdicollection.py</MdCreator>\n"; |
---|
810 | content += " <MdCreationDate>2010-10-11</MdCreationDate>\n"; |
---|
811 | content += " <MdSelfLink>collection_ATILF_Resources.cmdi</MdSelfLink>\n"; |
---|
812 | content += " <MdProfile>clarin.eu:cr1:p_1288172614026</MdProfile>\n"; |
---|
813 | content += " </Header>\n"; |
---|
814 | content += " <Resources>\n"; |
---|
815 | content += " <ResourceProxyList>\n"; |
---|
816 | content += "<ResourceProxy id=\"ATILF_Resources_0_oai_atilf_inalf_fr_0001.xml.cmdi\"><ResourceType>Metadata</ResourceType><ResourceRef>ATILF_Resources/0/oai_atilf_inalf_fr_0001.xml.cmdi</ResourceRef></ResourceProxy>\n"; |
---|
817 | content += "<ResourceProxy id=\"ATILF_Resources_0_oai_atilf_inalf_fr_0002.xml.cmdi\"><ResourceType>Metadata</ResourceType><ResourceRef>ATILF_Resources/0/oai_atilf_inalf_fr_0002.xml.cmdi</ResourceRef></ResourceProxy>\n"; |
---|
818 | content += "<ResourceProxy id=\"ATILF_Resources_0_oai_atilf_inalf_fr_0003.xml.cmdi\"><ResourceType>Metadata</ResourceType><ResourceRef>ATILF_Resources/0/oai_atilf_inalf_fr_0003.xml.cmdi</ResourceRef></ResourceProxy>\n"; |
---|
819 | content += "<ResourceProxy id=\"ATILF_Resources_0_oai_atilf_inalf_fr_0004.xml.cmdi\"><ResourceType>Metadata</ResourceType><ResourceRef>ATILF_Resources/0/oai_atilf_inalf_fr_0004.xml.cmdi</ResourceRef></ResourceProxy>\n"; |
---|
820 | content += "<ResourceProxy id=\"ATILF_Resources_0_oai_atilf_inalf_fr_0005_a.xml.cmdi\"><ResourceType>Metadata</ResourceType><ResourceRef>ATILF_Resources/0/oai_atilf_inalf_fr_0005_a.xml.cmdi</ResourceRef></ResourceProxy>\n"; |
---|
821 | content += "<ResourceProxy id=\"ATILF_Resources_0_oai_atilf_inalf_fr_0005_b.xml.cmdi\"><ResourceType>Metadata</ResourceType><ResourceRef>ATILF_Resources/0/oai_atilf_inalf_fr_0005_b.xml.cmdi</ResourceRef></ResourceProxy>\n"; |
---|
822 | content += "<ResourceProxy id=\"ATILF_Resources_0_oai_atilf_inalf_fr_0006.xml.cmdi\"><ResourceType>Metadata</ResourceType><ResourceRef>ATILF_Resources/0/oai_atilf_inalf_fr_0006.xml.cmdi</ResourceRef></ResourceProxy>\n"; |
---|
823 | content += "<ResourceProxy id=\"ATILF_Resources_0_oai_atilf_inalf_fr_M277.xml.cmdi\"><ResourceType>Metadata</ResourceType><ResourceRef>ATILF_Resources/0/oai_atilf_inalf_fr_M277.xml.cmdi</ResourceRef></ResourceProxy>\n"; |
---|
824 | content += "<ResourceProxy id=\"ATILF_Resources_0_oai_atilf_inalf_fr_M592.xml.cmdi\"><ResourceType>Metadata</ResourceType><ResourceRef>ATILF_Resources/0/oai_atilf_inalf_fr_M592.xml.cmdi</ResourceRef></ResourceProxy>\n"; |
---|
825 | content += " </ResourceProxyList>\n"; |
---|
826 | content += " <JournalFileProxyList/>\n"; |
---|
827 | content += " <ResourceRelationList/>\n"; |
---|
828 | content += " </Resources>\n"; |
---|
829 | content += " <Components>\n"; |
---|
830 | content += " <olac></olac>\n"; |
---|
831 | content += " </Components>\n"; |
---|
832 | content += "</CMD>\n"; |
---|
833 | |
---|
834 | File cmdiFile = createCmdiFile("testOlac", content); |
---|
835 | CMDIDataProcessor processor = getDataParser(); |
---|
836 | CMDIData data = processor.process(cmdiFile); |
---|
837 | assertEquals("collection_ATILF_Resources.cmdi", data.getId()); |
---|
838 | List<Resource> resources = data.getMetadataResources(); |
---|
839 | assertEquals(9, resources.size()); |
---|
840 | Resource res = resources.get(0); |
---|
841 | assertEquals("ATILF_Resources/0/oai_atilf_inalf_fr_0001.xml.cmdi", res.getResourceName()); |
---|
842 | assertEquals(null, res.getMimeType()); |
---|
843 | assertEquals(0, data.getDataResources().size()); |
---|
844 | SolrInputDocument doc = data.getSolrDocument(); |
---|
845 | assertNotNull(doc); |
---|
846 | List<Resource> dataResources = data.getDataResources(); |
---|
847 | assertEquals(0, dataResources.size()); |
---|
848 | } |
---|
849 | |
---|
850 | @Test |
---|
851 | public void testLrtCollection() throws Exception { |
---|
852 | |
---|
853 | // make sure the mapping file for testing is used |
---|
854 | VloConfig.setFacetConceptsFile("/facetConceptsTest.xml"); |
---|
855 | |
---|
856 | String content = ""; |
---|
857 | content += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; |
---|
858 | content += "<CMD xmlns=\"http://www.clarin.eu/cmd/\" ns0:schemaLocation=\"http://www.clarin.eu/cmd http://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/profiles/clarin.eu:cr1:p_1289827960126/xsd\" xmlns:ns0=\"http://www.w3.org/2001/XMLSchema-instance\">\n"; |
---|
859 | content += " <Header>\n"; |
---|
860 | content += " <MdCreator>lrt2cmdi.py</MdCreator>\n"; |
---|
861 | content += " <MdCreationDate>2010-11-17</MdCreationDate>\n"; |
---|
862 | content += " <MdSelfLink>clarin.eu:lrt:433</MdSelfLink>\n"; |
---|
863 | content += " <MdProfile>clarin.eu:cr1:p_1289827960126</MdProfile>\n"; |
---|
864 | content += " </Header>\n"; |
---|
865 | content += " <Resources>\n"; |
---|
866 | content += " <ResourceProxyList />\n"; |
---|
867 | content += " <JournalFileProxyList />\n"; |
---|
868 | content += " <ResourceRelationList />\n"; |
---|
869 | content += " </Resources>\n"; |
---|
870 | content += " <Components>\n"; |
---|
871 | content += " <LrtInventoryResource>\n"; |
---|
872 | content += " <LrtCommon>\n"; |
---|
873 | content += " <ResourceName>Corpus of Present-day Written Estonian</ResourceName>\n"; |
---|
874 | content += " <ResourceType>Written Corpus</ResourceType>\n"; |
---|
875 | content += " <LanguagesOther />\n"; |
---|
876 | content += " <Description>written general; 95 mio words; TEI/SGML</Description>\n"; |
---|
877 | content += " <ContactPerson>Kadri.Muischnek@ut.ee</ContactPerson>\n"; |
---|
878 | content += " <Format />\n"; |
---|
879 | content += " <Institute>Test</Institute>\n"; |
---|
880 | content += " <MetadataLink />\n"; |
---|
881 | content += " <Publications />\n"; |
---|
882 | content += " <ReadilyAvailable>true</ReadilyAvailable>\n"; |
---|
883 | content += " <ReferenceLink /> \n"; |
---|
884 | content += " <Languages><ISO639><iso-639-3-code>est</iso-639-3-code></ISO639></Languages>\n"; |
---|
885 | content += " <Countries><Country><Code>EE</Code></Country></Countries>\n"; |
---|
886 | content += " </LrtCommon>\n"; |
---|
887 | content += " </LrtInventoryResource>\n"; |
---|
888 | content += " </Components>\n"; |
---|
889 | content += "</CMD>\n"; |
---|
890 | |
---|
891 | File cmdiFile = createCmdiFile("testOlac", content); |
---|
892 | CMDIDataProcessor processor = getDataParser(); |
---|
893 | CMDIData data = processor.process(cmdiFile); |
---|
894 | assertEquals("clarin.eu:lrt:433", data.getId()); |
---|
895 | List<Resource> resources = data.getMetadataResources(); |
---|
896 | assertEquals(0, resources.size()); |
---|
897 | List<Resource> dataResources = data.getDataResources(); |
---|
898 | assertEquals(0, dataResources.size()); |
---|
899 | SolrInputDocument doc = data.getSolrDocument(); |
---|
900 | assertNotNull(doc); |
---|
901 | assertEquals(9, doc.getFieldNames().size()); |
---|
902 | assertEquals("Corpus of Present-day Written Estonian", doc.getFieldValue("name")); |
---|
903 | assertEquals(null, doc.getFieldValue("continent")); |
---|
904 | assertEquals(1, doc.getFieldValues("language").size()); |
---|
905 | assertEquals("Estonian", doc.getFieldValue("language")); |
---|
906 | assertEquals("Estonia", doc.getFieldValue("country")); |
---|
907 | assertEquals("Test", doc.getFieldValue("organisation")); |
---|
908 | assertEquals(null, doc.getFieldValue("year")); |
---|
909 | assertEquals(null, doc.getFieldValue("genre")); |
---|
910 | assertEquals("written general; 95 mio words; TEI/SGML", doc.getFieldValue("description")); |
---|
911 | assertEquals("Written Corpus", doc.getFieldValue(FacetConstants.FIELD_RESOURCE_CLASS)); |
---|
912 | } |
---|
913 | } |
---|