1 | package eu.clarin.cmdi.vlo.importer; |
---|
2 | |
---|
3 | import eu.clarin.cmdi.vlo.FacetConstants; |
---|
4 | import eu.clarin.cmdi.vlo.config.DataRoot; |
---|
5 | import eu.clarin.cmdi.vlo.config.VloConfig; |
---|
6 | import java.io.File; |
---|
7 | import java.io.IOException; |
---|
8 | import java.net.MalformedURLException; |
---|
9 | import java.util.ArrayList; |
---|
10 | import java.util.Collection; |
---|
11 | import java.util.Collections; |
---|
12 | import java.util.List; |
---|
13 | import org.apache.solr.client.solrj.SolrServerException; |
---|
14 | import org.apache.solr.common.SolrInputDocument; |
---|
15 | import static org.junit.Assert.assertEquals; |
---|
16 | import org.junit.Test; |
---|
17 | |
---|
18 | public class MetadataImporterTest extends ImporterTestcase { |
---|
19 | |
---|
20 | @Test |
---|
21 | public void testImporterSimple() throws Exception { |
---|
22 | String session = ""; |
---|
23 | session += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; |
---|
24 | session += "<CMD xmlns=\"http://www.clarin.eu/cmd/\">\n"; |
---|
25 | session += " <Header>\n"; |
---|
26 | session += " <MdCreationDate>2008-05-27</MdCreationDate>\n"; |
---|
27 | session += " <MdSelfLink> testID1Session</MdSelfLink>\n"; |
---|
28 | session += " <MdCollectionDisplayName>CollectionName</MdCollectionDisplayName>\n"; |
---|
29 | session += " <MdProfile>clarin.eu:cr1:p_1271859438204</MdProfile>\n"; |
---|
30 | session += " </Header>\n"; |
---|
31 | session += " <Resources>\n"; |
---|
32 | session += " <ResourceProxyList>\n"; |
---|
33 | session += " <ResourceProxy id=\"d314e408\">\n"; |
---|
34 | session += " <ResourceType mimetype=\"video/x-mpeg1\" >Resource</ResourceType>\n"; |
---|
35 | session += " <ResourceRef>../Media/elan-example1.mpg</ResourceRef>\n"; |
---|
36 | session += " </ResourceProxy>\n"; |
---|
37 | session += " </ResourceProxyList>\n"; |
---|
38 | session += " </Resources>\n"; |
---|
39 | session += " <Components>\n"; |
---|
40 | session += " <Session>\n"; |
---|
41 | session += " <Name>kleve-route</Name>\n"; |
---|
42 | session += " <Title>kleve-route-title</Title>\n"; |
---|
43 | session += " </Session>\n"; |
---|
44 | session += " </Components>\n"; |
---|
45 | session += "</CMD>\n"; |
---|
46 | File sessionFile = createCmdiFile("testSession", session); |
---|
47 | |
---|
48 | String content = ""; |
---|
49 | content += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; |
---|
50 | content += "<CMD xmlns=\"http://www.clarin.eu/cmd/\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.clarin.eu/cmd http://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/profiles/clarin.eu:cr1:p_1274880881885/xsd\">\n"; |
---|
51 | content += " <Header>\n"; |
---|
52 | content += " <MdSelfLink>testID2</MdSelfLink>\n"; |
---|
53 | content += " </Header>\n"; |
---|
54 | content += " <Resources>\n"; |
---|
55 | content += " <ResourceProxyList>\n"; |
---|
56 | content += " <ResourceProxy id=\"d28635e19\">\n"; |
---|
57 | content += " <ResourceType>Metadata</ResourceType>\n"; |
---|
58 | content += " <ResourceRef>" + sessionFile.getName() + "</ResourceRef>\n"; |
---|
59 | content += " </ResourceProxy>\n"; |
---|
60 | content += " </ResourceProxyList>\n"; |
---|
61 | content += " </Resources>\n"; |
---|
62 | content += " <Components>\n"; |
---|
63 | content += " <imdi-corpus>\n"; |
---|
64 | content += " <Corpus>\n"; |
---|
65 | content += " <Name>MPI corpora</Name>\n"; |
---|
66 | content += " </Corpus>\n"; |
---|
67 | content += " </imdi-corpus>\n"; |
---|
68 | content += " </Components>\n"; |
---|
69 | content += "</CMD>\n"; |
---|
70 | File rootFile = createCmdiFile("rootFile", content); |
---|
71 | |
---|
72 | List<SolrInputDocument> docs = importData(rootFile.getParentFile()); |
---|
73 | assertEquals(1, docs.size()); |
---|
74 | SolrInputDocument doc = docs.get(0); |
---|
75 | assertEquals("testID1Session", getValue(doc, FacetConstants.FIELD_ID)); |
---|
76 | assertEquals("CollectionName", getValue(doc, FacetConstants.FIELD_COLLECTION)); |
---|
77 | assertEquals("testRoot", getValue(doc, FacetConstants.FIELD_DATA_PROVIDER)); |
---|
78 | assertEquals("kleve-route", getValue(doc, FacetConstants.FIELD_NAME)); |
---|
79 | assertEquals(sessionFile.getAbsolutePath(), getValue(doc, FacetConstants.FIELD_FILENAME)); |
---|
80 | assertEquals("video", getValue(doc, FacetConstants.FIELD_RESOURCE_TYPE)); |
---|
81 | assertEquals("video/x-mpeg1|../Media/elan-example1.mpg", getValue(doc, FacetConstants.FIELD_RESOURCE)); |
---|
82 | } |
---|
83 | |
---|
84 | @Test |
---|
85 | public void testImportWithMimeTypeOverride() throws Exception { |
---|
86 | String content = ""; |
---|
87 | content += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; |
---|
88 | content += "<CMD xmlns=\"http://www.clarin.eu/cmd/\">\n"; |
---|
89 | content += " <Header>\n"; |
---|
90 | content += " <MdSelfLink>testID2</MdSelfLink>\n"; |
---|
91 | content += " <MdProfile>clarin.eu:cr1:p_1289827960126</MdProfile>\n"; |
---|
92 | content += " </Header>\n"; |
---|
93 | content += " <Resources>\n"; |
---|
94 | content += " <ResourceProxyList>\n"; |
---|
95 | content += " <ResourceProxy id=\"refLink\">\n"; |
---|
96 | content += " <ResourceType>Resource</ResourceType>\n"; |
---|
97 | content += " <ResourceRef>http://terminotica.upf.es/CREL/LIC01.htm</ResourceRef>\n"; |
---|
98 | content += " </ResourceProxy>\n"; |
---|
99 | content += " <ResourceProxy id=\"refLink2\">\n"; |
---|
100 | content += " <ResourceType>Resource</ResourceType>\n"; |
---|
101 | content += " <ResourceRef>file://bla.resource2.txt</ResourceRef>\n"; |
---|
102 | content += " </ResourceProxy>\n"; |
---|
103 | content += " </ResourceProxyList>\n"; |
---|
104 | content += " </Resources>\n"; |
---|
105 | content += " <Components>\n"; |
---|
106 | content += " <LrtInventoryResource>\n"; |
---|
107 | content += " <LrtCommon>\n"; |
---|
108 | content += " <ResourceName>PALIC</ResourceName>\n"; |
---|
109 | content += " <ResourceType>Application / Tool</ResourceType>\n"; |
---|
110 | content += " <ResourceType>Text</ResourceType>\n"; |
---|
111 | content += " </LrtCommon>\n"; |
---|
112 | content += " </LrtInventoryResource>\n"; |
---|
113 | content += " </Components>\n"; |
---|
114 | content += "</CMD>\n"; |
---|
115 | File rootFile = createCmdiFile("rootFile", content); |
---|
116 | |
---|
117 | List<SolrInputDocument> docs = importData(rootFile); |
---|
118 | assertEquals(1, docs.size()); |
---|
119 | SolrInputDocument doc = docs.get(0); |
---|
120 | assertEquals("PALIC", getValue(doc, FacetConstants.FIELD_NAME)); |
---|
121 | Collection<Object> fieldValues = doc.getFieldValues(FacetConstants.FIELD_RESOURCE_TYPE); |
---|
122 | assertEquals(2, fieldValues.size()); |
---|
123 | List<String> values = new ArrayList(fieldValues); |
---|
124 | Collections.sort(values); |
---|
125 | assertEquals("Application / Tool", values.get(0)); |
---|
126 | assertEquals("text", values.get(1)); |
---|
127 | fieldValues = doc.getFieldValues(FacetConstants.FIELD_RESOURCE); |
---|
128 | assertEquals(2, fieldValues.size()); |
---|
129 | values = new ArrayList(fieldValues); |
---|
130 | Collections.sort(values); |
---|
131 | assertEquals("text|file://bla.resource2.txt", values.get(0)); |
---|
132 | assertEquals("unknown type|http://terminotica.upf.es/CREL/LIC01.htm", values.get(1)); |
---|
133 | } |
---|
134 | |
---|
135 | @Test |
---|
136 | public void testImportWithNameSpaceGalore() throws Exception { |
---|
137 | String content = ""; |
---|
138 | content += "<cmdi:CMD CMDVersion=\"1.1\" xmlns:cmdi=\"http://www.clarin.eu/cmd/\"\n"; |
---|
139 | content += " xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.clarin.eu/cmd/ http://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/profiles/clarin.eu:cr1:p_1290431694629/xsd\">\n"; |
---|
140 | content += " <cmdi:Header/>\n"; |
---|
141 | content += " <cmdi:Resources>\n"; |
---|
142 | content += " <cmdi:ResourceProxyList>\n"; |
---|
143 | content += " <cmdi:ResourceProxy id=\"TEI\">\n"; |
---|
144 | content += " <cmdi:ResourceType>Resource</cmdi:ResourceType>\n"; |
---|
145 | content += " <cmdi:ResourceRef>http://hdl.handle.net/11858/00-175C-0000-0000-E180-8?urlappend=/TEI</cmdi:ResourceRef>\n"; |
---|
146 | content += " </cmdi:ResourceProxy>\n"; |
---|
147 | content += " </cmdi:ResourceProxyList>\n"; |
---|
148 | content += " <cmdi:JournalFileProxyList/>\n"; |
---|
149 | content += " <cmdi:ResourceRelationList/>\n"; |
---|
150 | content += " </cmdi:Resources>\n"; |
---|
151 | content += " <cmdi:Components>\n"; |
---|
152 | content += " <cmdi:EastRepublican ref=\"TEI\">\n"; |
---|
153 | content += " <cmdi:GeneralInformation>\n"; |
---|
154 | content += " <cmdi:Identifier>hdl:11858/00-175C-0000-0000-E180-8</cmdi:Identifier>\n"; |
---|
155 | content += " <cmdi:Title>L'Est R\u00e9publicain : \u00e9dition du 17 mai 1999</cmdi:Title>\n"; |
---|
156 | content += " </cmdi:GeneralInformation>\n"; |
---|
157 | content += " </cmdi:EastRepublican>\n"; |
---|
158 | content += " </cmdi:Components>\n"; |
---|
159 | content += "</cmdi:CMD>\n"; |
---|
160 | |
---|
161 | File rootFile = createCmdiFile("rootFile", content); |
---|
162 | |
---|
163 | List<SolrInputDocument> docs = importData(rootFile); |
---|
164 | assertEquals(1, docs.size()); |
---|
165 | SolrInputDocument doc = docs.get(0); |
---|
166 | assertEquals("hdl:11858/00-175C-0000-0000-E180-8", getValue(doc, FacetConstants.FIELD_ID)); |
---|
167 | assertEquals("L'Est R\u00e9publicain : \u00e9dition du 17 mai 1999", getValue(doc, FacetConstants.FIELD_NAME)); |
---|
168 | assertEquals("unknown type|http://hdl.handle.net/11858/00-175C-0000-0000-E180-8?urlappend=/TEI", getValue(doc, |
---|
169 | FacetConstants.FIELD_RESOURCE)); |
---|
170 | } |
---|
171 | |
---|
172 | @Test |
---|
173 | public void testNoIdTakeFileName() throws Exception { |
---|
174 | String session = ""; |
---|
175 | session += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; |
---|
176 | session += "<CMD xmlns=\"http://www.clarin.eu/cmd/\">\n"; |
---|
177 | session += " <Header>\n"; |
---|
178 | session += " <MdProfile>clarin.eu:cr1:p_1271859438204</MdProfile>\n"; |
---|
179 | session += " </Header>\n"; |
---|
180 | session += " <Resources>\n"; |
---|
181 | session += " </Resources>\n"; |
---|
182 | session += " <Components>\n"; |
---|
183 | session += " <Session>\n"; |
---|
184 | session += " <Name>kleve-route</Name>\n"; |
---|
185 | session += " <Title>kleve-route-title</Title>\n"; |
---|
186 | session += " </Session>\n"; |
---|
187 | session += " </Components>\n"; |
---|
188 | session += "</CMD>\n"; |
---|
189 | File sessionFile = createCmdiFile("testSession", session); |
---|
190 | |
---|
191 | List<SolrInputDocument> docs = importData(sessionFile); |
---|
192 | assertEquals(1, docs.size()); |
---|
193 | SolrInputDocument doc = docs.get(0); |
---|
194 | assertEquals("testRoot/" + sessionFile.getName(), getValue(doc, FacetConstants.FIELD_ID)); |
---|
195 | } |
---|
196 | |
---|
197 | @Test |
---|
198 | public void testProjectName() throws Exception { |
---|
199 | String content = ""; |
---|
200 | content += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; |
---|
201 | content += "<CMD xmlns=\"http://www.clarin.eu/cmd/\">\n"; |
---|
202 | content += " <Header>\n"; |
---|
203 | content += " <MdProfile>clarin.eu:cr1:p_1280305685235</MdProfile>\n"; |
---|
204 | content += " </Header>\n"; |
---|
205 | content += " <Resources>\n"; |
---|
206 | content += " </Resources>\n"; |
---|
207 | content += " <Components>\n"; |
---|
208 | content += " <DynaSAND>\n"; |
---|
209 | content += " <Collection>\n"; |
---|
210 | content += " <GeneralInfo>\n"; |
---|
211 | content += " <Name>DiDDD</Name>\n"; |
---|
212 | content += " <ID>id1234</ID>\n"; |
---|
213 | content += " </GeneralInfo>\n"; |
---|
214 | content += " <Project>\n"; |
---|
215 | content += " <Name>DiDDD-project</Name>\n"; |
---|
216 | content += " </Project>\n"; |
---|
217 | content += " </Collection>\n"; |
---|
218 | content += " </DynaSAND>\n"; |
---|
219 | content += " </Components>\n"; |
---|
220 | content += "</CMD>\n"; |
---|
221 | File sessionFile = createCmdiFile("testSession", content); |
---|
222 | |
---|
223 | List<SolrInputDocument> docs = importData(sessionFile); |
---|
224 | assertEquals(1, docs.size()); |
---|
225 | SolrInputDocument doc = docs.get(0); |
---|
226 | assertEquals("testRoot", getValue(doc, FacetConstants.FIELD_COLLECTION)); |
---|
227 | assertEquals("DiDDD-project", getValue(doc, FacetConstants.FIELD_PROJECT_NAME)); |
---|
228 | } |
---|
229 | |
---|
230 | private Object getValue(SolrInputDocument doc, String field) { |
---|
231 | assertEquals(1, doc.getFieldValues(field).size()); |
---|
232 | return doc.getFieldValue(field); |
---|
233 | } |
---|
234 | |
---|
235 | private List<SolrInputDocument> importData(File rootFile) throws MalformedURLException { |
---|
236 | final List<SolrInputDocument> result = new ArrayList<SolrInputDocument>(); |
---|
237 | |
---|
238 | String fileName = VloConfig.class.getResource("/VloConfig.xml").getFile(); |
---|
239 | |
---|
240 | VloConfig testConfig; |
---|
241 | |
---|
242 | /** |
---|
243 | * Problem: at the moment the readTestConfig method is not prepared for |
---|
244 | * a message from the importer. May rename readTestConfig to |
---|
245 | * readWebAppTestConfig, so that we have space for a |
---|
246 | * readImporterTestConfig also. |
---|
247 | * |
---|
248 | * The best thing would be to have a directory for testing inside the |
---|
249 | * package. You can always use an external configuration file for more |
---|
250 | * elaborate testing.test directory inside the package. |
---|
251 | */ |
---|
252 | |
---|
253 | testConfig = VloConfig.readTestConfig(fileName); |
---|
254 | |
---|
255 | // modify the test configuration a bit |
---|
256 | |
---|
257 | testConfig = modifyConfig(testConfig, rootFile); |
---|
258 | |
---|
259 | // ... and also the importer itself |
---|
260 | |
---|
261 | MetadataImporter importer; |
---|
262 | importer = new MetadataImporter(testConfig) { |
---|
263 | @Override |
---|
264 | protected void initSolrServer() throws MalformedURLException { |
---|
265 | //do nothing no solrserver in test |
---|
266 | } |
---|
267 | |
---|
268 | @Override |
---|
269 | protected void sendDocs() throws SolrServerException, IOException { |
---|
270 | //overriding here so we can test the docs |
---|
271 | result.addAll(this.docs); |
---|
272 | docs = new ArrayList<SolrInputDocument>(); |
---|
273 | } |
---|
274 | }; |
---|
275 | importer.startImport(); |
---|
276 | return result; |
---|
277 | } |
---|
278 | |
---|
279 | private VloConfig modifyConfig(VloConfig config, File rootFile) { |
---|
280 | DataRoot dataRoot = new DataRoot(); |
---|
281 | dataRoot.setDeleteFirst(false);//cannot delete not using real solrServer |
---|
282 | dataRoot.setOriginName("testRoot"); |
---|
283 | dataRoot.setRootFile(rootFile); |
---|
284 | dataRoot.setTostrip(""); |
---|
285 | dataRoot.setPrefix("http://example.com"); |
---|
286 | config.setDataRoots(Collections.singletonList(dataRoot)); |
---|
287 | return config; |
---|
288 | } |
---|
289 | |
---|
290 | } |
---|