1 | package eu.clarin.cmdi.vlo.importer; |
---|
2 | |
---|
3 | import static org.junit.Assert.assertEquals; |
---|
4 | import static org.junit.Assert.assertNotNull; |
---|
5 | import static org.junit.Assert.assertNull; |
---|
6 | |
---|
7 | import java.io.File; |
---|
8 | import java.util.ArrayList; |
---|
9 | import java.util.Collection; |
---|
10 | import java.util.Collections; |
---|
11 | import java.util.List; |
---|
12 | |
---|
13 | import org.apache.solr.common.SolrInputDocument; |
---|
14 | import org.junit.Test; |
---|
15 | |
---|
16 | public class CMDIDataProcessorTest extends ImporterTestcase { |
---|
17 | |
---|
18 | private CMDIDataProcessor getDataParser(FacetMapping map) { |
---|
19 | return new CMDIParserVTDXML(map); |
---|
20 | // return new CMDIDigester(map); |
---|
21 | } |
---|
22 | |
---|
23 | @Test |
---|
24 | public void testCreateCMDIDataFromCorpus() throws Exception { |
---|
25 | String content = ""; |
---|
26 | content += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; |
---|
27 | content += "<CMD xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\">\n"; |
---|
28 | content += " <Header>\n"; |
---|
29 | content += " <MdCreationDate>2003-01-14</MdCreationDate>\n"; |
---|
30 | content += " <MdSelfLink>test-hdl:1839/00-0000-0000-0000-0001-D</MdSelfLink>\n"; |
---|
31 | content += " <MdProfile>clarin.eu:cr1:p_1274880881885</MdProfile>\n"; |
---|
32 | content += " </Header>\n"; |
---|
33 | content += " <Resources>\n"; |
---|
34 | content += " <ResourceProxyList>\n"; |
---|
35 | content += " <ResourceProxy id=\"d28635e19\">\n"; |
---|
36 | content += " <ResourceType>Metadata</ResourceType>\n"; |
---|
37 | content += " <ResourceRef>../acqui_data/Corpusstructure/acqui.imdi.cmdi</ResourceRef>\n"; |
---|
38 | content += " </ResourceProxy>\n"; |
---|
39 | content += " <ResourceProxy id=\"d28635e23\">\n"; |
---|
40 | content += " <ResourceType>Metadata</ResourceType>\n"; |
---|
41 | content += " <ResourceRef>../Comprehension/Corpusstructure/comprehension.imdi.cmdi</ResourceRef>\n"; |
---|
42 | content += " </ResourceProxy>\n"; |
---|
43 | content += " <ResourceProxy id=\"d28635e26\">\n"; |
---|
44 | content += " <ResourceType>Metadata</ResourceType>\n"; |
---|
45 | content += " <ResourceRef>../lac_data/Corpusstructure/lac.imdi.cmdi</ResourceRef>\n"; |
---|
46 | content += " </ResourceProxy>\n"; |
---|
47 | content += " </ResourceProxyList>\n"; |
---|
48 | content += " <JournalFileProxyList/>\n"; |
---|
49 | content += " <ResourceRelationList/>\n"; |
---|
50 | content += " </Resources>\n"; |
---|
51 | content += " <Components>\n"; |
---|
52 | content += " <imdi-corpus>\n"; |
---|
53 | content += " <Corpus>\n"; |
---|
54 | content += " <Name>MPI corpora</Name>\n"; |
---|
55 | content += " <Title>Corpora of the Max-Planck Institute for Psycholinguistics</Title>\n"; |
---|
56 | content += " <CorpusLink Name=\"Acquisition\">../acqui_data/Corpusstructure/acqui.imdi</CorpusLink>\n"; |
---|
57 | content += " <CorpusLink Name=\"Comprehension\">../Comprehension/Corpusstructure/comprehension.imdi</CorpusLink>\n"; |
---|
58 | content += " <CorpusLink Name=\"Language and Cognition\">../lac_data/Corpusstructure/lac.imdi</CorpusLink>\n"; |
---|
59 | content += " <descriptions>\n"; |
---|
60 | content += " <Description LanguageId=\"\">IMDI corpora</Description>\n"; |
---|
61 | content += " <Description LanguageId=\"\"/>\n"; |
---|
62 | content += " </descriptions>\n"; |
---|
63 | content += " </Corpus>\n"; |
---|
64 | content += " </imdi-corpus>\n"; |
---|
65 | content += " </Components>\n"; |
---|
66 | content += "</CMD>\n"; |
---|
67 | File cmdiFile = createCmdiFile("testCorpus", content); |
---|
68 | CMDIDataProcessor processor = getDataParser(getIMDIFacetMap()); |
---|
69 | CMDIData data = processor.process(cmdiFile); |
---|
70 | assertEquals("test-hdl:1839/00-0000-0000-0000-0001-D", data.getId()); |
---|
71 | List<Resource> resources = data.getMetadataResources(); |
---|
72 | assertEquals(3, resources.size()); |
---|
73 | Resource res = resources.get(0); |
---|
74 | assertEquals("../acqui_data/Corpusstructure/acqui.imdi.cmdi", res.getResourceName()); |
---|
75 | assertEquals(null, res.getMimeType()); |
---|
76 | assertEquals(0, data.getDataResources().size()); |
---|
77 | SolrInputDocument doc = data.getSolrDocument(); |
---|
78 | assertNull(doc); |
---|
79 | } |
---|
80 | |
---|
81 | @Test |
---|
82 | public void testCreateCMDIDataFromSession() throws Exception { |
---|
83 | String content = ""; |
---|
84 | content += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; |
---|
85 | content += "<CMD xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n"; |
---|
86 | content += " xsi:schemaLocation=\"http://www.clarin.eu/cmd http://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/profiles/clarin.eu:cr1:p_1271859438204/xsd\">\n"; |
---|
87 | content += " <Header>\n"; |
---|
88 | content += " <MdCreationDate>2008-05-27</MdCreationDate>\n"; |
---|
89 | content += " <MdSelfLink>test-hdl:1839/00-0000-0000-0009-294C-9</MdSelfLink>\n"; |
---|
90 | content += " <MdProfile>clarin.eu:cr1:p_1271859438204</MdProfile>\n"; |
---|
91 | content += " </Header>\n"; |
---|
92 | content += " <Resources>\n"; |
---|
93 | content += " <ResourceProxyList>\n"; |
---|
94 | content += " <ResourceProxy id=\"d314e408\">\n"; |
---|
95 | content += " <ResourceType mimetype=\"video/x-mpeg1\" >Resource</ResourceType>\n"; |
---|
96 | content += " <ResourceRef>../Media/elan-example1.mpg</ResourceRef>\n"; |
---|
97 | content += " </ResourceProxy>\n"; |
---|
98 | content += " <ResourceProxy id=\"d314e471\">\n"; |
---|
99 | content += " <ResourceType mimetype=\"audio/mpeg\" >Resource</ResourceType>\n"; |
---|
100 | content += " <ResourceRef>../Media/elan-example1.mp3</ResourceRef>\n"; |
---|
101 | content += " </ResourceProxy>\n"; |
---|
102 | content += " </ResourceProxyList>\n"; |
---|
103 | content += " <JournalFileProxyList/>\n"; |
---|
104 | content += " <ResourceRelationList/>\n"; |
---|
105 | content += " </Resources>\n"; |
---|
106 | content += " <Components>\n"; |
---|
107 | content += " <Session>\n"; |
---|
108 | content += " <Name>kleve-route</Name>\n"; |
---|
109 | content += " <Title>route description to Kleve</Title>\n"; |
---|
110 | content += " <Date>2002-10-30</Date>\n"; |
---|
111 | content += " <descriptions>\n"; |
---|
112 | content += " <Description LanguageId=\"ISO639-2:eng\">This recording was made to generate a freely available test resource including speech and gestures. The annotations were created by Peter and Kita who is gesture researcher at the MPI for Psycholinguistics.</Description>\n"; |
---|
113 | content += " <Description LanguageId=\"ISO639-2:ger\">Diese Aufnahme wurde erzeugt, um eine frei verf\\u00fcgbare Test Resource zur Verf\\u00fcgung stellen zu k\\u00f6nnen, die Sprache und Gestik umfasst. Die Annotationen wurden von Peter und Kita, dem Gestik Researcher am MPI erzeugt.</Description>\n"; |
---|
114 | content += " </descriptions>\n"; |
---|
115 | content += " <MDGroup>\n"; |
---|
116 | content += " <Location>\n"; |
---|
117 | content += " <Continent>Europe</Continent>\n"; |
---|
118 | content += " <Country>Netherlands</Country>\n"; |
---|
119 | content += " <Region/>\n"; |
---|
120 | content += " <Address>Wundtlaan 1, Nijmegen</Address>\n"; |
---|
121 | content += " </Location>\n"; |
---|
122 | content += " <Project>\n"; |
---|
123 | content += " <Name>Peter Wittenburg</Name>\n"; |
---|
124 | content += " <Title>Route description test resource</Title>\n"; |
---|
125 | content += " <Id/>\n"; |
---|
126 | content += " <Contact>\n"; |
---|
127 | content += " <Name>Peter Wittenburg</Name>\n"; |
---|
128 | content += " <Address>Wundtlaan 1, 6525 XD Nijmegen</Address>\n"; |
---|
129 | content += " <Email>peter.wittenburg@mpi.nl</Email>\n"; |
---|
130 | content += " <Organisation>Max Planck Institute for Psycholinguistics</Organisation>\n"; |
---|
131 | content += " </Contact>\n"; |
---|
132 | content += " <descriptions>\n"; |
---|
133 | content += " <Description LanguageId=\"\"/>\n"; |
---|
134 | content += " </descriptions>\n"; |
---|
135 | content += " </Project>\n"; |
---|
136 | content += " <Keys>\n"; |
---|
137 | content += " <Key Name=\"conversion.IMDI.1.9to3.0.warning\">Unknown mapping of Genre: conversation|explanation|unspecified --> ???</Key>\n"; |
---|
138 | content += " </Keys>\n"; |
---|
139 | content += " <Content>\n"; |
---|
140 | content += " <Genre>Unspecified</Genre>\n"; |
---|
141 | content += " <SubGenre>Unspecified</SubGenre>\n"; |
---|
142 | content += " <Task>route description</Task>\n"; |
---|
143 | content += " <Modalities>Speech; Gestures</Modalities>\n"; |
---|
144 | content += " <CommunicationContext>\n"; |
---|
145 | content += " <Interactivity>interactive</Interactivity>\n"; |
---|
146 | content += " <PlanningType>semi-spontaneous</PlanningType>\n"; |
---|
147 | content += " <Involvement>elicited</Involvement>\n"; |
---|
148 | content += " <SocialContext>Unspecified</SocialContext>\n"; |
---|
149 | content += " <EventStructure>Unspecified</EventStructure>\n"; |
---|
150 | content += " <Channel>Unspecified</Channel>\n"; |
---|
151 | content += " </CommunicationContext>\n"; |
---|
152 | content += " <Content_Languages>\n"; |
---|
153 | content += " <descriptions>\n"; |
---|
154 | content += " <Description LanguageId=\"\"/>\n"; |
---|
155 | content += " </descriptions>\n"; |
---|
156 | content += " <Content_Language>\n"; |
---|
157 | content += " <Id>ISO639-3:eng</Id>\n"; |
---|
158 | content += " <Name>English</Name>\n"; |
---|
159 | content += " <descriptions>\n"; |
---|
160 | content += " <Description LanguageId=\"\"/>\n"; |
---|
161 | content += " </descriptions>\n"; |
---|
162 | content += " </Content_Language>\n"; |
---|
163 | content += " </Content_Languages>\n"; |
---|
164 | content += " <Keys>\n"; |
---|
165 | content += " <Key Name=\"IMDI__1_9.Interactional\">conversation</Key>\n"; |
---|
166 | content += " <Key Name=\"IMDI__1_9.Discursive\">explanation</Key>\n"; |
---|
167 | content += " <Key Name=\"IMDI__1_9.Interactional\">Unspecified</Key>\n"; |
---|
168 | content += " </Keys>\n"; |
---|
169 | content += " <descriptions>\n"; |
---|
170 | content += " <Description LanguageId=\"ISO639:eng\">This file was generated from an IMDI 1.9 file and transformed to IMDI 3.0. The substructure of Genre is replaced by two elements named \"Genre\" and \"SubGenre\". The original content of Genre substructure was: Interactional = 'conversation', Discursive = 'explanation', Performance = 'Unspecified'. These values have been added as Keys to the Content information.</Description>\n"; |
---|
171 | content += " <Description LanguageId=\"ISO639:eng\">Peter explains how to come from Nijmegen to Kleve by car, such that Kita would be able to get there.</Description>\n"; |
---|
172 | content += " </descriptions>\n"; |
---|
173 | content += " </Content>\n"; |
---|
174 | content += " <Actors>\n"; |
---|
175 | content += " <descriptions>\n"; |
---|
176 | content += " <Description LanguageId=\"\"/>\n"; |
---|
177 | content += " </descriptions>\n"; |
---|
178 | content += " <Actor>\n"; |
---|
179 | content += " <Role>interviewee</Role>\n"; |
---|
180 | content += " <Name>Peter</Name>\n"; |
---|
181 | content += " <FullName>Peter Wittenburg</FullName>\n"; |
---|
182 | content += " <Code>W</Code>\n"; |
---|
183 | content += " <FamilySocialRole>Unspecified</FamilySocialRole>\n"; |
---|
184 | content += " <EthnicGroup/>\n"; |
---|
185 | content += " <Age>Unknown</Age>\n"; |
---|
186 | content += " <BirthDate>Unspecified</BirthDate>\n"; |
---|
187 | content += " <Sex>Unknown</Sex>\n"; |
---|
188 | content += " <Education>university</Education>\n"; |
---|
189 | content += " <Anonymized>true</Anonymized>\n"; |
---|
190 | content += " <Contact>\n"; |
---|
191 | content += " <Name/>\n"; |
---|
192 | content += " <Address/>\n"; |
---|
193 | content += " <Email/>\n"; |
---|
194 | content += " <Organisation/>\n"; |
---|
195 | content += " </Contact>\n"; |
---|
196 | content += " <Keys/>\n"; |
---|
197 | content += " <descriptions>\n"; |
---|
198 | content += " <Description LanguageId=\"\"/>\n"; |
---|
199 | content += " </descriptions>\n"; |
---|
200 | content += " <Actor_Languages>\n"; |
---|
201 | content += " <descriptions>\n"; |
---|
202 | content += " <Description LanguageId=\"\"/>\n"; |
---|
203 | content += " </descriptions>\n"; |
---|
204 | content += " <Actor_Language>\n"; |
---|
205 | content += " <Id>ISO639-3:nld</Id>\n"; |
---|
206 | content += " <Name>Dutch</Name>\n"; |
---|
207 | content += " <descriptions>\n"; |
---|
208 | content += " <Description LanguageId=\"\"/>\n"; |
---|
209 | content += " </descriptions>\n"; |
---|
210 | content += " </Actor_Language>\n"; |
---|
211 | content += " <Actor_Language>\n"; |
---|
212 | content += " <Id>ISO639-3:deu</Id>\n"; |
---|
213 | content += " <Name>German</Name>\n"; |
---|
214 | content += " <descriptions>\n"; |
---|
215 | content += " <Description LanguageId=\"\"/>\n"; |
---|
216 | content += " </descriptions>\n"; |
---|
217 | content += " </Actor_Language>\n"; |
---|
218 | content += " <Actor_Language>\n"; |
---|
219 | content += " <Id>ISO639-3:eng</Id>\n"; |
---|
220 | content += " <Name>English</Name>\n"; |
---|
221 | content += " <descriptions>\n"; |
---|
222 | content += " <Description LanguageId=\"\"/>\n"; |
---|
223 | content += " </descriptions>\n"; |
---|
224 | content += " </Actor_Language>\n"; |
---|
225 | content += " </Actor_Languages>\n"; |
---|
226 | content += " </Actor>\n"; |
---|
227 | content += " <Actor>\n"; |
---|
228 | content += " <Role>interviewer</Role>\n"; |
---|
229 | content += " <Name>Kita</Name>\n"; |
---|
230 | content += " <FullName>Sotaro Kita</FullName>\n"; |
---|
231 | content += " <Code>k</Code>\n"; |
---|
232 | content += " <FamilySocialRole>Unspecified</FamilySocialRole>\n"; |
---|
233 | content += " <EthnicGroup/>\n"; |
---|
234 | content += " <Age>Unknown</Age>\n"; |
---|
235 | content += " <BirthDate>Unspecified</BirthDate>\n"; |
---|
236 | content += " <Sex>Unknown</Sex>\n"; |
---|
237 | content += " <Education>university</Education>\n"; |
---|
238 | content += " <Anonymized>true</Anonymized>\n"; |
---|
239 | content += " <Contact>\n"; |
---|
240 | content += " <Name/>\n"; |
---|
241 | content += " <Address/>\n"; |
---|
242 | content += " <Email/>\n"; |
---|
243 | content += " <Organisation/>\n"; |
---|
244 | content += " </Contact>\n"; |
---|
245 | content += " <Keys/>\n"; |
---|
246 | content += " <descriptions>\n"; |
---|
247 | content += " <Description LanguageId=\"\"/>\n"; |
---|
248 | content += " </descriptions>\n"; |
---|
249 | content += " <Actor_Languages>\n"; |
---|
250 | content += " <descriptions>\n"; |
---|
251 | content += " <Description LanguageId=\"\"/>\n"; |
---|
252 | content += " </descriptions>\n"; |
---|
253 | content += " <Actor_Language>\n"; |
---|
254 | content += " <Id>ISO639-3:eng</Id>\n"; |
---|
255 | content += " <Name>English</Name>\n"; |
---|
256 | content += " <descriptions>\n"; |
---|
257 | content += " <Description LanguageId=\"\"/>\n"; |
---|
258 | content += " </descriptions>\n"; |
---|
259 | content += " </Actor_Language>\n"; |
---|
260 | content += " <Actor_Language>\n"; |
---|
261 | content += " <Id>ISO639-3:jpn</Id>\n"; |
---|
262 | content += " <Name>Japanese</Name>\n"; |
---|
263 | content += " <descriptions>\n"; |
---|
264 | content += " <Description LanguageId=\"\"/>\n"; |
---|
265 | content += " </descriptions>\n"; |
---|
266 | content += " </Actor_Language>\n"; |
---|
267 | content += " </Actor_Languages>\n"; |
---|
268 | content += " </Actor>\n"; |
---|
269 | content += " <Actor>\n"; |
---|
270 | content += " <Role>Collector</Role>\n"; |
---|
271 | content += " <Name>Peter Wittenburg</Name>\n"; |
---|
272 | content += " <FullName>Peter Wittenburg</FullName>\n"; |
---|
273 | content += " <Code>Unspecified</Code>\n"; |
---|
274 | content += " <FamilySocialRole>Unspecified</FamilySocialRole>\n"; |
---|
275 | content += " <EthnicGroup/>\n"; |
---|
276 | content += " <Age>Unspecified</Age>\n"; |
---|
277 | content += " <BirthDate>Unspecified</BirthDate>\n"; |
---|
278 | content += " <Sex>Unspecified</Sex>\n"; |
---|
279 | content += " <Education/>\n"; |
---|
280 | content += " <Anonymized>false</Anonymized>\n"; |
---|
281 | content += " <Contact>\n"; |
---|
282 | content += " <Name>Peter Wittenburg</Name>\n"; |
---|
283 | content += " <Address>Wundtlaan 1, 6525 XD Nijmegen</Address>\n"; |
---|
284 | content += " <Email>peter.wittenburg@mpi.nl</Email>\n"; |
---|
285 | content += " <Organisation>Max-Planck-Institute for Psycholinguistics</Organisation>\n"; |
---|
286 | content += " </Contact>\n"; |
---|
287 | content += " <Keys/>\n"; |
---|
288 | content += " <descriptions>\n"; |
---|
289 | content += " <Description LanguageId=\"\"/>\n"; |
---|
290 | content += " </descriptions>\n"; |
---|
291 | content += " <Actor_Languages/>\n"; |
---|
292 | content += " </Actor>\n"; |
---|
293 | content += " </Actors>\n"; |
---|
294 | content += " </MDGroup>\n"; |
---|
295 | content += " <Resources>\n"; |
---|
296 | content += " <MediaFile ref=\"d314e408\">\n"; |
---|
297 | content += " <ResourceLink>../Media/elan-example1.mpg</ResourceLink>\n"; |
---|
298 | content += " <Type>video</Type>\n"; |
---|
299 | content += " <Format>video/x-mpeg1</Format>\n"; |
---|
300 | content += " <Size/>\n"; |
---|
301 | content += " <Quality>Unknown</Quality>\n"; |
---|
302 | content += " <RecordingConditions>excellent</RecordingConditions>\n"; |
---|
303 | content += " <TimePosition>\n"; |
---|
304 | content += " <Start>Unknown</Start>\n"; |
---|
305 | content += " <End>Unknown</End>\n"; |
---|
306 | content += " </TimePosition>\n"; |
---|
307 | content += " <Access>\n"; |
---|
308 | content += " <Availability>openly available</Availability>\n"; |
---|
309 | content += " <Date>2003-02-12</Date>\n"; |
---|
310 | content += " <Owner>MPI for Psycholinguistics</Owner>\n"; |
---|
311 | content += " <Publisher/>\n"; |
---|
312 | content += " <Contact>\n"; |
---|
313 | content += " <Name>Romuald Skiba</Name>\n"; |
---|
314 | content += " <Address/>\n"; |
---|
315 | content += " <Email/>\n"; |
---|
316 | content += " <Organisation/>\n"; |
---|
317 | content += " </Contact>\n"; |
---|
318 | content += " <descriptions>\n"; |
---|
319 | content += " <Description LanguageId=\"\"/>\n"; |
---|
320 | content += " </descriptions>\n"; |
---|
321 | content += " </Access>\n"; |
---|
322 | content += " <descriptions>\n"; |
---|
323 | content += " <Description LanguageId=\"\"/>\n"; |
---|
324 | content += " </descriptions>\n"; |
---|
325 | content += " <Keys/>\n"; |
---|
326 | content += " </MediaFile>\n"; |
---|
327 | content += " <MediaFile ref=\"d314e471\">\n"; |
---|
328 | content += " <ResourceLink>../Media/elan-example1.mp4</ResourceLink>\n"; |
---|
329 | content += " <Type>video</Type>\n"; |
---|
330 | content += " <Format>video/mp4</Format>\n"; |
---|
331 | content += " <Size/>\n"; |
---|
332 | content += " <Quality>Unknown</Quality>\n"; |
---|
333 | content += " <RecordingConditions>excellent</RecordingConditions>\n"; |
---|
334 | content += " <TimePosition>\n"; |
---|
335 | content += " <Start>Unknown</Start>\n"; |
---|
336 | content += " <End>Unknown</End>\n"; |
---|
337 | content += " </TimePosition>\n"; |
---|
338 | content += " <Access>\n"; |
---|
339 | content += " <Availability>openly available</Availability>\n"; |
---|
340 | content += " <Date>2003-02-12</Date>\n"; |
---|
341 | content += " <Owner>MPI for Psycholinguistics</Owner>\n"; |
---|
342 | content += " <Publisher/>\n"; |
---|
343 | content += " <Contact>\n"; |
---|
344 | content += " <Name>Romuald Skiba</Name>\n"; |
---|
345 | content += " <Address/>\n"; |
---|
346 | content += " <Email/>\n"; |
---|
347 | content += " <Organisation/>\n"; |
---|
348 | content += " </Contact>\n"; |
---|
349 | content += " <descriptions>\n"; |
---|
350 | content += " <Description LanguageId=\"\"/>\n"; |
---|
351 | content += " </descriptions>\n"; |
---|
352 | content += " </Access>\n"; |
---|
353 | content += " <descriptions>\n"; |
---|
354 | content += " <Description LanguageId=\"\"/>\n"; |
---|
355 | content += " </descriptions>\n"; |
---|
356 | content += " <Keys/>\n"; |
---|
357 | content += " </MediaFile>\n"; |
---|
358 | content += " </Resources>\n"; |
---|
359 | content += " <References>\n"; |
---|
360 | content += " <descriptions>\n"; |
---|
361 | content += " <Description LanguageId=\"\"/>\n"; |
---|
362 | content += " </descriptions>\n"; |
---|
363 | content += " </References>\n"; |
---|
364 | content += " </Session>\n"; |
---|
365 | content += " </Components>\n"; |
---|
366 | content += "</CMD>\n"; |
---|
367 | File cmdiFile = createCmdiFile("testSession", content); |
---|
368 | CMDIDataProcessor processor = getDataParser(getIMDIFacetMap()); |
---|
369 | CMDIData data = processor.process(cmdiFile); |
---|
370 | assertEquals("test-hdl:1839/00-0000-0000-0009-294C-9", data.getId()); |
---|
371 | List<Resource> resources = data.getMetadataResources(); |
---|
372 | assertEquals(0, resources.size()); |
---|
373 | List<Resource> dataResources = data.getDataResources(); |
---|
374 | assertEquals(2, dataResources.size()); |
---|
375 | Resource res = dataResources.get(0); |
---|
376 | assertEquals("../Media/elan-example1.mpg", res.getResourceName()); |
---|
377 | assertEquals("video/x-mpeg1", res.getMimeType()); |
---|
378 | res = dataResources.get(1); |
---|
379 | assertEquals("../Media/elan-example1.mp3", res.getResourceName()); |
---|
380 | assertEquals("audio/mpeg", res.getMimeType()); |
---|
381 | SolrInputDocument doc = data.getSolrDocument(); |
---|
382 | assertNotNull(doc); |
---|
383 | assertEquals(8, doc.getFieldNames().size()); |
---|
384 | assertEquals("kleve-route", doc.getFieldValue("name")); |
---|
385 | assertEquals("Europe", doc.getFieldValue("continent")); |
---|
386 | assertEquals("ISO639-3:eng", doc.getFieldValue("language")); |
---|
387 | assertEquals("Netherlands", doc.getFieldValue("country")); |
---|
388 | assertEquals("Max Planck Institute for Psycholinguistics", doc.getFieldValue("organisation")); |
---|
389 | assertEquals("unspecified", doc.getFieldValue("genre")); |
---|
390 | assertEquals( |
---|
391 | "This recording was made to generate a freely available test resource including speech and gestures. The annotations were created by Peter and Kita who is gesture researcher at the MPI for Psycholinguistics.", |
---|
392 | doc.getFieldValue("description")); |
---|
393 | assertEquals("2002-10-30", doc.getFieldValue("year")); |
---|
394 | assertEquals(null, doc.getFieldValue("subject")); |
---|
395 | } |
---|
396 | |
---|
397 | @Test |
---|
398 | public void testEmptyFieldsShouldBeNull() throws Exception { |
---|
399 | String content = ""; |
---|
400 | content += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; |
---|
401 | content += "<CMD xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n"; |
---|
402 | content += " xsi:schemaLocation=\"http://www.clarin.eu/cmd http://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/profiles/clarin.eu:cr1:p_1271859438204/xsd\">\n"; |
---|
403 | content += " <Header>\n"; |
---|
404 | content += " <MdCreationDate>2008-05-27</MdCreationDate>\n"; |
---|
405 | content += " <MdSelfLink>test-hdl:1839/00-0000-0000-0009-294C-9</MdSelfLink>\n"; |
---|
406 | content += " <MdProfile>clarin.eu:cr1:p_1271859438204</MdProfile>\n"; |
---|
407 | content += " </Header>\n"; |
---|
408 | content += " <Resources>\n"; |
---|
409 | content += " <ResourceProxyList>\n"; |
---|
410 | content += " </ResourceProxyList>\n"; |
---|
411 | content += " <JournalFileProxyList/>\n"; |
---|
412 | content += " <ResourceRelationList/>\n"; |
---|
413 | content += " </Resources>\n"; |
---|
414 | content += " <Components>\n"; |
---|
415 | content += " <Session>\n"; |
---|
416 | content += " <Name>kleve-route</Name>\n"; |
---|
417 | content += " <Title>route description to Kleve</Title>\n"; |
---|
418 | content += " <Date></Date>\n"; |
---|
419 | content += " <descriptions>\n"; |
---|
420 | content += " <Description LanguageId=\"ISO639-2:eng\">Test.</Description>\n"; |
---|
421 | content += " </descriptions>\n"; |
---|
422 | content += " <MDGroup>\n"; |
---|
423 | content += " <Location>\n"; |
---|
424 | content += " <Continent>Europe</Continent>\n"; |
---|
425 | content += " <Country>Netherlands</Country>\n"; |
---|
426 | content += " <Region/>\n"; |
---|
427 | content += " <Address>Wundtlaan 1, Nijmegen</Address>\n"; |
---|
428 | content += " </Location>\n"; |
---|
429 | content += " <Project>\n"; |
---|
430 | content += " <Name>Peter Wittenburg</Name>\n"; |
---|
431 | content += " <Title>Route description test resource</Title>\n"; |
---|
432 | content += " <Id/>\n"; |
---|
433 | content += " <Contact>\n"; |
---|
434 | content += " <Name></Name>\n"; |
---|
435 | content += " <Address></Address>\n"; |
---|
436 | content += " <Email></Email>\n"; |
---|
437 | content += " <Organisation></Organisation>\n"; |
---|
438 | content += " </Contact>\n"; |
---|
439 | content += " <descriptions>\n"; |
---|
440 | content += " <Description LanguageId=\"\"/>\n"; |
---|
441 | content += " </descriptions>\n"; |
---|
442 | content += " </Project>\n"; |
---|
443 | content += " <Keys>\n"; |
---|
444 | content += " </Keys>\n"; |
---|
445 | content += " <Content>\n"; |
---|
446 | content += " <Genre>Unspecified</Genre>\n"; |
---|
447 | content += " <SubGenre>Unspecified</SubGenre>\n"; |
---|
448 | content += " <Task>route description</Task>\n"; |
---|
449 | content += " <Modalities>Speech; Gestures</Modalities>\n"; |
---|
450 | content += " <CommunicationContext>\n"; |
---|
451 | content += " </CommunicationContext>\n"; |
---|
452 | content += " <Content_Languages>\n"; |
---|
453 | content += " </Content_Languages>\n"; |
---|
454 | content += " <descriptions>\n"; |
---|
455 | content += " </descriptions>\n"; |
---|
456 | content += " </Content>\n"; |
---|
457 | content += " <Actors>\n"; |
---|
458 | content += " </Actors>\n"; |
---|
459 | content += " </MDGroup>\n"; |
---|
460 | content += " <Resources>\n"; |
---|
461 | content += " </Resources>\n"; |
---|
462 | content += " </Session>\n"; |
---|
463 | content += " </Components>\n"; |
---|
464 | content += "</CMD>\n"; |
---|
465 | File cmdiFile = createCmdiFile("testSession", content); |
---|
466 | CMDIDataProcessor processor = getDataParser(getIMDIFacetMap()); |
---|
467 | CMDIData data = processor.process(cmdiFile); |
---|
468 | assertEquals("test-hdl:1839/00-0000-0000-0009-294C-9", data.getId()); |
---|
469 | List<Resource> resources = data.getMetadataResources(); |
---|
470 | assertEquals(0, resources.size()); |
---|
471 | SolrInputDocument doc = data.getSolrDocument(); |
---|
472 | assertNotNull(doc); |
---|
473 | assertEquals(5, doc.getFieldNames().size()); |
---|
474 | assertEquals("kleve-route", doc.getFieldValue("name")); |
---|
475 | assertEquals("Europe", doc.getFieldValue("continent")); |
---|
476 | assertEquals("Netherlands", doc.getFieldValue("country")); |
---|
477 | assertEquals("unspecified", doc.getFieldValue("genre")); |
---|
478 | assertEquals("Test.", doc.getFieldValue("description")); |
---|
479 | assertEquals("Should be null not empty string", null, doc.getFieldValue("organisation")); |
---|
480 | assertEquals(null, doc.getFieldValue("language")); |
---|
481 | assertEquals(null, doc.getFieldValue("subject")); |
---|
482 | assertEquals(null, doc.getFieldValue("year")); |
---|
483 | } |
---|
484 | |
---|
485 | @Test |
---|
486 | public void testOlac() throws Exception { |
---|
487 | String content = ""; |
---|
488 | content += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; |
---|
489 | content += "<CMD xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n"; |
---|
490 | content += " xmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n"; |
---|
491 | content += " xmlns:oai_dc=\"http://www.openarchives.org/OAI/2.0/oai_dc/\"\n"; |
---|
492 | content += " xmlns:defns=\"http://www.openarchives.org/OAI/2.0/\"\n"; |
---|
493 | content += " xsi:schemaLocation=\"http://www.clarin.eu/cmd http://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/profiles/clarin.eu:cr1:p_1271859438236/xsd\">\n"; |
---|
494 | content += " <Header>\n"; |
---|
495 | content += " <MdCreator>olac2cmdi.xsl</MdCreator>\n"; |
---|
496 | content += " <MdCreationDate>2002-12-14</MdCreationDate>\n"; |
---|
497 | content += " <MdSelfLink>oai:ailla.utexas.edu:1</MdSelfLink>\n"; |
---|
498 | content += " <MdProfile>clarin.eu:cr1:p_1271859438236</MdProfile>\n"; |
---|
499 | content += " </Header>\n"; |
---|
500 | content += " <Resources>\n"; |
---|
501 | content += " <ResourceProxyList/>\n"; |
---|
502 | content += " <JournalFileProxyList/>\n"; |
---|
503 | content += " <ResourceRelationList/>\n"; |
---|
504 | content += " </Resources>\n"; |
---|
505 | content += " <Components>\n"; |
---|
506 | content += " <OLAC-DcmiTerms>\n"; |
---|
507 | content += " <creator>Joel Sherzer (recorder)</creator>\n"; |
---|
508 | content += " <description>\n"; |
---|
509 | content += " Channel: Talking;\n"; |
---|
510 | content += " Genre: Traditional Narrative / Story;\n"; |
---|
511 | content += " Country: Panama;\n"; |
---|
512 | content += " Place of Recording: Mulatuppu;\n"; |
---|
513 | content += " Event: Community Gathering;\n"; |
---|
514 | content += " Institutional Affiliation: University of Texas at Austin;\n"; |
---|
515 | content += " Participant Information: Political Leader;\n"; |
---|
516 | content += " </description>\n"; |
---|
517 | content += " <description>The one-eyed grandmother is one of many traditional Kuna stories performed in the Kuna gathering house. This story, performed here by Pedro Arias, combines European derived motifs (Tom Thumb and Hansel and Gretel) with themes that seem more Kuna in origin. All are woven together and a moral is provided. Pedro Arias performed this story before a gathered audience in the morning..\n"; |
---|
518 | content += " </description>\n"; |
---|
519 | content += " <description>Test</description>\n"; |
---|
520 | content += " <identifier>http://uts.cc.utexas.edu/~ailla/audio/sherzer/one_eyed_grandmother.ram</identifier>\n"; |
---|
521 | content += " <identifier>http://uts.cc.utexas.edu/~ailla/texts/sherzer/one_eyed_grandmother.pdf</identifier>\n"; |
---|
522 | content += " <language olac-language=\"x-sil-CHN\"/>\n"; |
---|
523 | content += " <language>Chinese</language>\n"; |
---|
524 | content += " <subject olac-linguistic-field=\"testSubject\">Kuna</subject>\n"; |
---|
525 | content += " <type olac-linguistic-type=\"Transcription\"/>\n"; |
---|
526 | content += " </OLAC-DcmiTerms>\n"; |
---|
527 | content += " </Components>\n"; |
---|
528 | content += "</CMD>\n"; |
---|
529 | |
---|
530 | File cmdiFile = createCmdiFile("testOlac", content); |
---|
531 | CMDIDataProcessor processor = getDataParser(getOlacFacetMap()); |
---|
532 | CMDIData data = processor.process(cmdiFile); |
---|
533 | assertEquals("oai:ailla.utexas.edu:1", data.getId()); |
---|
534 | List<Resource> resources = data.getMetadataResources(); |
---|
535 | assertEquals(0, resources.size()); |
---|
536 | List<Resource> dataResources = data.getDataResources(); |
---|
537 | assertEquals(0, dataResources.size()); |
---|
538 | SolrInputDocument doc = data.getSolrDocument(); |
---|
539 | assertNotNull(doc); |
---|
540 | assertEquals(4, doc.getFieldNames().size()); |
---|
541 | assertEquals(null, doc.getFieldValue("name")); |
---|
542 | assertEquals(null, doc.getFieldValue("continent")); |
---|
543 | assertEquals(1, doc.getFieldValues("language").size()); |
---|
544 | assertEquals("x-sil-CHN", doc.getFieldValue("language")); |
---|
545 | assertEquals(null, doc.getFieldValue("country")); |
---|
546 | assertEquals(null, doc.getFieldValue("organisation")); |
---|
547 | assertEquals("transcription", doc.getFieldValue("genre")); |
---|
548 | assertEquals("testsubject", doc.getFieldValue("subject")); |
---|
549 | Collection<Object> fieldValues = doc.getFieldValues("description"); |
---|
550 | assertEquals(3, fieldValues.size()); |
---|
551 | List<String> descriptions = new ArrayList(fieldValues); |
---|
552 | Collections.sort(descriptions); |
---|
553 | assertEquals("\n Channel: Talking;\n Genre: Traditional Narrative / Story;\n Country: Panama;\n" |
---|
554 | + " Place of Recording: Mulatuppu;\n Event: Community Gathering;\n" |
---|
555 | + " Institutional Affiliation: University of Texas at Austin;\n Participant Information: Political Leader;\n" |
---|
556 | + " ", descriptions.get(0).toString()); |
---|
557 | assertEquals("Test", descriptions.get(1).toString()); |
---|
558 | assertEquals("The one-eyed grandmother is one of many traditional Kuna stories performed " |
---|
559 | + "in the Kuna gathering house. This story, performed here by Pedro Arias, combines " |
---|
560 | + "European derived motifs (Tom Thumb and Hansel and Gretel) with themes that seem more " |
---|
561 | + "Kuna in origin. All are woven together and a moral is provided. Pedro Arias performed " |
---|
562 | + "this story before a gathered audience in the morning..\n ", descriptions.get(2).toString()); |
---|
563 | } |
---|
564 | |
---|
565 | @Test |
---|
566 | public void testOlacMultiFacets() throws Exception { |
---|
567 | String content = ""; |
---|
568 | content += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; |
---|
569 | content += "<CMD>\n"; |
---|
570 | content += " <Components>\n"; |
---|
571 | content += " <OLAC-DcmiTerms>\n"; |
---|
572 | content += " <subject olac-linguistic-field=\"testSubject\">Kuna</subject>\n"; |
---|
573 | content += " <subject dcterms-type=\"LCSH\">testSubjectFallback</subject>\n"; |
---|
574 | content += " <spatial dcterms-type=\"ISO3166\">testCountry1</spatial>\n"; |
---|
575 | content += " <coverage dcterms-type=\"ISO3166\">testCountry2</coverage>\n"; |
---|
576 | content += " <language olac-language=\"language1\">test1</language>\n"; |
---|
577 | content += " <subject olac-language=\"language2\">test2</subject>\n"; |
---|
578 | content += " </OLAC-DcmiTerms>\n"; |
---|
579 | content += " </Components>\n"; |
---|
580 | content += "</CMD>\n"; |
---|
581 | |
---|
582 | File cmdiFile = createCmdiFile("testOlac", content); |
---|
583 | CMDIDataProcessor processor = getDataParser(getOlacFacetMap()); |
---|
584 | CMDIData data = processor.process(cmdiFile); |
---|
585 | SolrInputDocument doc = data.getSolrDocument(); |
---|
586 | assertEquals(1, doc.getFieldValues("subject").size()); |
---|
587 | assertEquals("testsubject", doc.getFieldValue("subject")); |
---|
588 | assertEquals(1, doc.getFieldValues("country").size()); |
---|
589 | assertEquals("testCountry1", doc.getFieldValue("country")); |
---|
590 | assertEquals(1, doc.getFieldValues("language").size()); |
---|
591 | assertEquals("language1", doc.getFieldValue("language")); |
---|
592 | |
---|
593 | content = ""; |
---|
594 | content += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; |
---|
595 | content += "<CMD>\n"; |
---|
596 | content += " <Components>\n"; |
---|
597 | content += " <OLAC-DcmiTerms>\n"; |
---|
598 | content += " <subject dcterms-type=\"LCSH\">testSubjectFallback</subject>\n"; |
---|
599 | content += " <coverage dcterms-type=\"ISO3166\">testCountry2</coverage>\n"; |
---|
600 | content += " <subject olac-language=\"language2\">test2</subject>\n"; |
---|
601 | content += " </OLAC-DcmiTerms>\n"; |
---|
602 | content += " </Components>\n"; |
---|
603 | content += "</CMD>\n"; |
---|
604 | |
---|
605 | cmdiFile = createCmdiFile("testOlac", content); |
---|
606 | processor = getDataParser(getOlacFacetMap()); |
---|
607 | data = processor.process(cmdiFile); |
---|
608 | doc = data.getSolrDocument(); |
---|
609 | assertEquals(1, doc.getFieldValues("subject").size()); |
---|
610 | assertEquals("testsubjectfallback", doc.getFieldValue("subject")); |
---|
611 | assertEquals(1, doc.getFieldValues("country").size()); |
---|
612 | assertEquals("testCountry2", doc.getFieldValue("country")); |
---|
613 | assertEquals(1, doc.getFieldValues("language").size()); |
---|
614 | assertEquals("language2", doc.getFieldValue("language")); |
---|
615 | |
---|
616 | content = ""; |
---|
617 | content += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; |
---|
618 | content += "<CMD>\n"; |
---|
619 | content += " <Components>\n"; |
---|
620 | content += " <OLAC-DcmiTerms>\n"; |
---|
621 | content += " <subject dcterms-type=\"LCSH\">testSubjectFallback</subject>\n"; |
---|
622 | content += " <subject olac-linguistic-field=\"testSubject\">Kuna</subject>\n"; |
---|
623 | content += " <coverage dcterms-type=\"ISO3166\">testCountry2</coverage>\n"; |
---|
624 | content += " <spatial dcterms-type=\"ISO3166\">testCountry1</spatial>\n"; |
---|
625 | content += " <subject olac-language=\"language2\">test2</subject>\n"; |
---|
626 | content += " <language olac-language=\"language1\">test1</language>\n"; |
---|
627 | content += " </OLAC-DcmiTerms>\n"; |
---|
628 | content += " </Components>\n"; |
---|
629 | content += "</CMD>\n"; |
---|
630 | |
---|
631 | cmdiFile = createCmdiFile("testOlac", content); |
---|
632 | processor = getDataParser(getOlacFacetMap()); |
---|
633 | data = processor.process(cmdiFile); |
---|
634 | doc = data.getSolrDocument(); |
---|
635 | assertEquals(1, doc.getFieldValues("subject").size()); |
---|
636 | assertEquals("testsubject", doc.getFieldValue("subject")); |
---|
637 | assertEquals(1, doc.getFieldValues("country").size()); |
---|
638 | assertEquals("testCountry1", doc.getFieldValue("country")); |
---|
639 | assertEquals(1, doc.getFieldValues("language").size()); |
---|
640 | assertEquals("language1", doc.getFieldValue("language")); |
---|
641 | } |
---|
642 | |
---|
643 | @Test |
---|
644 | public void testIgnoreWhiteSpaceFacets() throws Exception { |
---|
645 | String content = ""; |
---|
646 | content += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; |
---|
647 | content += "<CMD>\n"; |
---|
648 | content += " <Components>\n"; |
---|
649 | content += " <OLAC-DcmiTerms>\n"; |
---|
650 | content += " <subject olac-linguistic-field=\"\n\n\t\t\t\">Kuna</subject>\n"; |
---|
651 | content += " </OLAC-DcmiTerms>\n"; |
---|
652 | content += " </Components>\n"; |
---|
653 | content += "</CMD>\n"; |
---|
654 | |
---|
655 | File cmdiFile = createCmdiFile("testOlac", content); |
---|
656 | CMDIDataProcessor processor = getDataParser(getOlacFacetMap()); |
---|
657 | CMDIData data = processor.process(cmdiFile); |
---|
658 | SolrInputDocument doc = data.getSolrDocument(); |
---|
659 | assertEquals(null, doc.getFieldValues("subject")); |
---|
660 | } |
---|
661 | |
---|
662 | @Test |
---|
663 | public void testOlacCollection() throws Exception { |
---|
664 | String content = ""; |
---|
665 | content += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; |
---|
666 | content += "<CMD xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n"; |
---|
667 | content += " xsi:schemaLocation=\"http://www.clarin.eu/cmd http://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/profiles/clarin.eu:cr1:p_1271859438236/xsd\">\n"; |
---|
668 | content += " <Header>\n"; |
---|
669 | content += " <MdCreator>dir2cmdicollection.py</MdCreator>\n"; |
---|
670 | content += " <MdCreationDate>2010-10-11</MdCreationDate>\n"; |
---|
671 | content += " <MdSelfLink>collection_ATILF_Resources.cmdi</MdSelfLink>\n"; |
---|
672 | content += " <MdProfile>clarin.eu:cr1:p_1271859438236</MdProfile>\n"; |
---|
673 | content += " </Header>\n"; |
---|
674 | content += " <Resources>\n"; |
---|
675 | content += " <ResourceProxyList>\n"; |
---|
676 | content += "<ResourceProxy id=\"ATILF_Resources_0_oai_atilf_inalf_fr_0001.xml.cmdi\"><ResourceType>Metadata</ResourceType><ResourceRef>ATILF_Resources/0/oai_atilf_inalf_fr_0001.xml.cmdi</ResourceRef></ResourceProxy>\n"; |
---|
677 | content += "<ResourceProxy id=\"ATILF_Resources_0_oai_atilf_inalf_fr_0002.xml.cmdi\"><ResourceType>Metadata</ResourceType><ResourceRef>ATILF_Resources/0/oai_atilf_inalf_fr_0002.xml.cmdi</ResourceRef></ResourceProxy>\n"; |
---|
678 | content += "<ResourceProxy id=\"ATILF_Resources_0_oai_atilf_inalf_fr_0003.xml.cmdi\"><ResourceType>Metadata</ResourceType><ResourceRef>ATILF_Resources/0/oai_atilf_inalf_fr_0003.xml.cmdi</ResourceRef></ResourceProxy>\n"; |
---|
679 | content += "<ResourceProxy id=\"ATILF_Resources_0_oai_atilf_inalf_fr_0004.xml.cmdi\"><ResourceType>Metadata</ResourceType><ResourceRef>ATILF_Resources/0/oai_atilf_inalf_fr_0004.xml.cmdi</ResourceRef></ResourceProxy>\n"; |
---|
680 | content += "<ResourceProxy id=\"ATILF_Resources_0_oai_atilf_inalf_fr_0005_a.xml.cmdi\"><ResourceType>Metadata</ResourceType><ResourceRef>ATILF_Resources/0/oai_atilf_inalf_fr_0005_a.xml.cmdi</ResourceRef></ResourceProxy>\n"; |
---|
681 | content += "<ResourceProxy id=\"ATILF_Resources_0_oai_atilf_inalf_fr_0005_b.xml.cmdi\"><ResourceType>Metadata</ResourceType><ResourceRef>ATILF_Resources/0/oai_atilf_inalf_fr_0005_b.xml.cmdi</ResourceRef></ResourceProxy>\n"; |
---|
682 | content += "<ResourceProxy id=\"ATILF_Resources_0_oai_atilf_inalf_fr_0006.xml.cmdi\"><ResourceType>Metadata</ResourceType><ResourceRef>ATILF_Resources/0/oai_atilf_inalf_fr_0006.xml.cmdi</ResourceRef></ResourceProxy>\n"; |
---|
683 | content += "<ResourceProxy id=\"ATILF_Resources_0_oai_atilf_inalf_fr_M277.xml.cmdi\"><ResourceType>Metadata</ResourceType><ResourceRef>ATILF_Resources/0/oai_atilf_inalf_fr_M277.xml.cmdi</ResourceRef></ResourceProxy>\n"; |
---|
684 | content += "<ResourceProxy id=\"ATILF_Resources_0_oai_atilf_inalf_fr_M592.xml.cmdi\"><ResourceType>Metadata</ResourceType><ResourceRef>ATILF_Resources/0/oai_atilf_inalf_fr_M592.xml.cmdi</ResourceRef></ResourceProxy>\n"; |
---|
685 | content += " </ResourceProxyList>\n"; |
---|
686 | content += " <JournalFileProxyList/>\n"; |
---|
687 | content += " <ResourceRelationList/>\n"; |
---|
688 | content += " </Resources>\n"; |
---|
689 | content += " <Components>\n"; |
---|
690 | content += " <olac></olac>\n"; |
---|
691 | content += " </Components>\n"; |
---|
692 | content += "</CMD>\n"; |
---|
693 | |
---|
694 | File cmdiFile = createCmdiFile("testOlac", content); |
---|
695 | CMDIDataProcessor processor = getDataParser(getOlacFacetMap()); |
---|
696 | CMDIData data = processor.process(cmdiFile); |
---|
697 | assertEquals("collection_ATILF_Resources.cmdi", data.getId()); |
---|
698 | List<Resource> resources = data.getMetadataResources(); |
---|
699 | assertEquals(9, resources.size()); |
---|
700 | Resource res = resources.get(0); |
---|
701 | assertEquals("ATILF_Resources/0/oai_atilf_inalf_fr_0001.xml.cmdi", res.getResourceName()); |
---|
702 | assertEquals(null, res.getMimeType()); |
---|
703 | assertEquals(0, data.getDataResources().size()); |
---|
704 | SolrInputDocument doc = data.getSolrDocument(); |
---|
705 | assertNull(doc); |
---|
706 | List<Resource> dataResources = data.getDataResources(); |
---|
707 | assertEquals(0, dataResources.size()); |
---|
708 | } |
---|
709 | |
---|
710 | @Test |
---|
711 | public void testLrtCollection() throws Exception { |
---|
712 | String content = ""; |
---|
713 | content += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; |
---|
714 | content += "<CMD ns0:schemaLocation=\"http://www.clarin.eu/cmd http://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/profiles/clarin.eu:cr1:p_1289827960126/xsd\" xmlns:ns0=\"http://www.w3.org/2001/XMLSchema-instance\">\n"; |
---|
715 | content += " <Header>\n"; |
---|
716 | content += " <MdCreator>lrt2cmdi.py</MdCreator>\n"; |
---|
717 | content += " <MdCreationDate>2010-11-17</MdCreationDate>\n"; |
---|
718 | content += " <MdSelfLink>clarin.eu:lrt:433</MdSelfLink>\n"; |
---|
719 | content += " <MdProfile>clarin.eu:cr1:p_1289827960126</MdProfile>\n"; |
---|
720 | content += " </Header>\n"; |
---|
721 | content += " <Resources>\n"; |
---|
722 | content += " <ResourceProxyList />\n"; |
---|
723 | content += " <JournalFileProxyList />\n"; |
---|
724 | content += " <ResourceRelationList />\n"; |
---|
725 | content += " </Resources>\n"; |
---|
726 | content += " <Components>\n"; |
---|
727 | content += " <LrtInventoryResource>\n"; |
---|
728 | content += " <LrtCommon>\n"; |
---|
729 | content += " <ResourceName>Corpus of Present-day Written Estonian</ResourceName>\n"; |
---|
730 | content += " <ResourceType>Written Corpus</ResourceType>\n"; |
---|
731 | content += " <LanguagesOther />\n"; |
---|
732 | content += " <Description>written general; 95 mio words; TEI/SGML</Description>\n"; |
---|
733 | content += " <ContactPerson>Kadri.Muischnek@ut.ee</ContactPerson>\n"; |
---|
734 | content += " <Format />\n"; |
---|
735 | content += " <Institute>Test</Institute>\n"; |
---|
736 | content += " <MetadataLink />\n"; |
---|
737 | content += " <Publications />\n"; |
---|
738 | content += " <ReadilyAvailable>true</ReadilyAvailable>\n"; |
---|
739 | content += " <ReferenceLink /> \n"; |
---|
740 | content += " <Languages><ISO639><iso-639-3-code>est</iso-639-3-code></ISO639></Languages>\n"; |
---|
741 | content += " <Countries><Country><Code>EE</Code></Country></Countries>\n"; |
---|
742 | content += " </LrtCommon>\n"; |
---|
743 | content += " </LrtInventoryResource>\n"; |
---|
744 | content += " </Components>\n"; |
---|
745 | content += "</CMD>\n"; |
---|
746 | |
---|
747 | File cmdiFile = createCmdiFile("testOlac", content); |
---|
748 | CMDIDataProcessor processor = getDataParser(getLrtFacetMap()); |
---|
749 | CMDIData data = processor.process(cmdiFile); |
---|
750 | assertEquals("clarin.eu:lrt:433", data.getId()); |
---|
751 | List<Resource> resources = data.getMetadataResources(); |
---|
752 | assertEquals(0, resources.size()); |
---|
753 | List<Resource> dataResources = data.getDataResources(); |
---|
754 | assertEquals(0, dataResources.size()); |
---|
755 | SolrInputDocument doc = data.getSolrDocument(); |
---|
756 | assertNotNull(doc); |
---|
757 | assertEquals(5, doc.getFieldNames().size()); |
---|
758 | assertEquals("Corpus of Present-day Written Estonian", doc.getFieldValue("name")); |
---|
759 | assertEquals(null, doc.getFieldValue("continent")); |
---|
760 | assertEquals(1, doc.getFieldValues("language").size()); |
---|
761 | assertEquals("est", doc.getFieldValue("language")); |
---|
762 | assertEquals("EE", doc.getFieldValue("country")); |
---|
763 | assertEquals("Test", doc.getFieldValue("organisation")); |
---|
764 | assertEquals(null, doc.getFieldValue("year")); |
---|
765 | assertEquals(null, doc.getFieldValue("genre")); |
---|
766 | assertEquals("written general; 95 mio words; TEI/SGML", doc.getFieldValue("description")); |
---|
767 | } |
---|
768 | |
---|
769 | } |
---|