1 | /** |
---|
2 | * This software is copyright (c) 2012-2014 by |
---|
3 | * - Institut fuer Deutsche Sprache (http://www.ids-mannheim.de) |
---|
4 | * This is free software. You can redistribute it |
---|
5 | * and/or modify it under the terms described in |
---|
6 | * the GNU General Public License v3 of which you |
---|
7 | * should have received a copy. Otherwise you can download |
---|
8 | * it from |
---|
9 | * |
---|
10 | * http://www.gnu.org/licenses/gpl-3.0.txt |
---|
11 | * |
---|
12 | * @copyright Institut fuer Deutsche Sprache (http://www.ids-mannheim.de) |
---|
13 | * |
---|
14 | * @license http://www.gnu.org/licenses/gpl-3.0.txt |
---|
15 | * GNU General Public License v3 |
---|
16 | */ |
---|
17 | package eu.clarin.sru.client.fcs; |
---|
18 | |
---|
19 | import java.net.URI; |
---|
20 | import java.net.URISyntaxException; |
---|
21 | import java.util.ArrayList; |
---|
22 | import java.util.HashMap; |
---|
23 | import java.util.List; |
---|
24 | import java.util.Map; |
---|
25 | |
---|
26 | import javax.xml.XMLConstants; |
---|
27 | import javax.xml.namespace.QName; |
---|
28 | import javax.xml.stream.XMLStreamException; |
---|
29 | import javax.xml.stream.XMLStreamReader; |
---|
30 | |
---|
31 | import org.slf4j.Logger; |
---|
32 | import org.slf4j.LoggerFactory; |
---|
33 | |
---|
34 | import eu.clarin.sru.client.SRUClientException; |
---|
35 | import eu.clarin.sru.client.SRUExtraResponseData; |
---|
36 | import eu.clarin.sru.client.SRUExtraResponseDataParser; |
---|
37 | import eu.clarin.sru.client.XmlStreamReaderUtils; |
---|
38 | import eu.clarin.sru.client.fcs.ClarinFCSEndpointDescription.DataView; |
---|
39 | import eu.clarin.sru.client.fcs.ClarinFCSEndpointDescription.DataView.DeliveryPolicy; |
---|
40 | import eu.clarin.sru.client.fcs.ClarinFCSEndpointDescription.ResourceInfo; |
---|
41 | |
---|
42 | |
---|
43 | /** |
---|
44 | * An extra response data parser for parsing CLARIN-FCS endpoint descriptions. |
---|
45 | */ |
---|
46 | public class ClarinFCSEndpointDescriptionParser implements |
---|
47 | SRUExtraResponseDataParser { |
---|
48 | /** |
---|
49 | * constant for infinite resource enumeration parsing depth |
---|
50 | */ |
---|
51 | public static final int INFINITE_MAX_DEPTH = -1; |
---|
52 | /** |
---|
53 | * constant for default parsing resource enumeration parsing depth |
---|
54 | */ |
---|
55 | public static final int DEFAULT_MAX_DEPTH = INFINITE_MAX_DEPTH; |
---|
56 | private static final Logger logger = |
---|
57 | LoggerFactory.getLogger(ClarinFCSClientBuilder.class); |
---|
58 | private static final String ED_NS_URI = |
---|
59 | "http://clarin.eu/fcs/endpoint-description"; |
---|
60 | private static final QName ED_ROOT_ELEMENT = |
---|
61 | new QName(ED_NS_URI, "EndpointDescription"); |
---|
62 | private static final int EXPECTED_VERSION = 1; |
---|
63 | private static final String CAPABILITY_PREFIX = |
---|
64 | "http://clarin.eu/fcs/capability/"; |
---|
65 | private static final URI CAPABILITY_BASIC_SEARCH = |
---|
66 | URI.create("http://clarin.eu/fcs/capability/basic-search"); |
---|
67 | private static final String MIMETYPE_HITS_DATAVIEW = |
---|
68 | "application/x-clarin-fcs-hits+xml"; |
---|
69 | private final int maxDepth; |
---|
70 | |
---|
71 | |
---|
72 | /** |
---|
73 | * Constructor. By default, the parser will parse the endpoint resource |
---|
74 | * enumeration to an infinite depth. |
---|
75 | */ |
---|
76 | public ClarinFCSEndpointDescriptionParser() { |
---|
77 | this(DEFAULT_MAX_DEPTH); |
---|
78 | } |
---|
79 | |
---|
80 | |
---|
81 | /** |
---|
82 | * Constructor. |
---|
83 | * |
---|
84 | * @param maxDepth |
---|
85 | * maximum depth for parsing the endpoint resource enumeration. |
---|
86 | * @throws IllegalArgumentException |
---|
87 | * if an argument is illegal |
---|
88 | */ |
---|
89 | public ClarinFCSEndpointDescriptionParser(int maxDepth) { |
---|
90 | if (maxDepth < -1) { |
---|
91 | throw new IllegalArgumentException("maxDepth < -1"); |
---|
92 | } |
---|
93 | this.maxDepth = maxDepth; |
---|
94 | } |
---|
95 | |
---|
96 | |
---|
97 | @Override |
---|
98 | public boolean supports(QName name) { |
---|
99 | return ED_ROOT_ELEMENT.equals(name); |
---|
100 | } |
---|
101 | |
---|
102 | |
---|
103 | @Override |
---|
104 | public SRUExtraResponseData parse(XMLStreamReader reader) |
---|
105 | throws XMLStreamException, SRUClientException { |
---|
106 | final int version = parseVersion(reader); |
---|
107 | if (version != EXPECTED_VERSION) { |
---|
108 | throw new SRUClientException("Attribute 'version' of " + |
---|
109 | "element '<EndpointDescription>' must be of value '1'"); |
---|
110 | } |
---|
111 | logger.error("VERSION: {}", version); |
---|
112 | reader.next(); // consume start tag |
---|
113 | |
---|
114 | // Capabilities |
---|
115 | List<URI> capabilities = null; |
---|
116 | XmlStreamReaderUtils.readStart(reader, ED_NS_URI, "Capabilities", true); |
---|
117 | while (XmlStreamReaderUtils.readStart(reader, ED_NS_URI, |
---|
118 | "Capability", (capabilities == null))) { |
---|
119 | final String s = XmlStreamReaderUtils.readString(reader, true); |
---|
120 | try { |
---|
121 | if (!s.startsWith(CAPABILITY_PREFIX)) { |
---|
122 | throw new XMLStreamException("Capabilites must start " + |
---|
123 | "with prefix '" + CAPABILITY_PREFIX + |
---|
124 | "' (offending value = '" + s +"')", |
---|
125 | reader.getLocation()); |
---|
126 | } |
---|
127 | final URI uri = new URI(s); |
---|
128 | if (capabilities == null) { |
---|
129 | capabilities = new ArrayList<URI>(); |
---|
130 | } |
---|
131 | capabilities.add(uri); |
---|
132 | } catch (URISyntaxException e) { |
---|
133 | throw new XMLStreamException("Capabilities must be encoded " + |
---|
134 | "as URIs (offending value = '" + s + "')", |
---|
135 | reader.getLocation(), e); |
---|
136 | } |
---|
137 | XmlStreamReaderUtils.readEnd(reader, ED_NS_URI, "Capability"); |
---|
138 | } // while |
---|
139 | XmlStreamReaderUtils.readEnd(reader, ED_NS_URI, "Capabilities"); |
---|
140 | if ((capabilities == null) || |
---|
141 | (capabilities.indexOf(CAPABILITY_BASIC_SEARCH) == -1)) { |
---|
142 | throw new SRUClientException("Endpoint must support " + |
---|
143 | "'basic-search' (" + CAPABILITY_BASIC_SEARCH + |
---|
144 | ") to conform to CLARIN-FCS specification"); |
---|
145 | } |
---|
146 | |
---|
147 | // SupportedDataViews |
---|
148 | List<DataView> supportedDataViews = null; |
---|
149 | XmlStreamReaderUtils.readStart(reader, ED_NS_URI, |
---|
150 | "SupportedDataViews", true); |
---|
151 | while (XmlStreamReaderUtils.readStart(reader, ED_NS_URI, |
---|
152 | "SupportedDataView", (supportedDataViews == null), true)) { |
---|
153 | final String id = XmlStreamReaderUtils.readAttributeValue( |
---|
154 | reader, null, "id", true); |
---|
155 | if ((id.indexOf(' ') != -1) || (id.indexOf(',') != -1) || |
---|
156 | (id.indexOf(';') != -1)) { |
---|
157 | throw new XMLStreamException("Value of attribute 'id' on " + |
---|
158 | "element '<SupportedDataView>' may not contain the " + |
---|
159 | "characters ',' (comma) or ';' (semicolon) " + |
---|
160 | "or ' ' (space)", reader.getLocation()); |
---|
161 | } |
---|
162 | final DeliveryPolicy policy = parsePolicy(reader); |
---|
163 | reader.next(); // consume start tag |
---|
164 | |
---|
165 | final String type = XmlStreamReaderUtils.readString(reader, true); |
---|
166 | // do some sanity checks ... |
---|
167 | if (supportedDataViews != null) { |
---|
168 | for (DataView dataView : supportedDataViews) { |
---|
169 | if (dataView.getIdentifier().equals(id)) { |
---|
170 | throw new XMLStreamException("Supported data view " + |
---|
171 | "with identifier '" + id + |
---|
172 | "' was already declared", reader.getLocation()); |
---|
173 | } |
---|
174 | if (dataView.getMimeType().equals(type)) { |
---|
175 | throw new XMLStreamException("Supported data view " + |
---|
176 | "with MIME type '" + type + |
---|
177 | "' was already declared", reader.getLocation()); |
---|
178 | } |
---|
179 | } |
---|
180 | } else { |
---|
181 | supportedDataViews = new ArrayList<DataView>(); |
---|
182 | } |
---|
183 | supportedDataViews.add(new DataView(id, type, policy)); |
---|
184 | XmlStreamReaderUtils.readEnd(reader, |
---|
185 | ED_NS_URI, "SupportedDataView"); |
---|
186 | } // while |
---|
187 | XmlStreamReaderUtils.readEnd(reader, ED_NS_URI, |
---|
188 | "SupportedDataViews", true); |
---|
189 | boolean found = false; |
---|
190 | if (supportedDataViews != null) { |
---|
191 | for (DataView dataView : supportedDataViews) { |
---|
192 | if (MIMETYPE_HITS_DATAVIEW.equals(dataView.getMimeType())) { |
---|
193 | found = true; |
---|
194 | break; |
---|
195 | } |
---|
196 | } |
---|
197 | } |
---|
198 | if (!found) { |
---|
199 | throw new SRUClientException("Endpoint must support " + |
---|
200 | "generic hits dataview (expected MIME type '" + |
---|
201 | MIMETYPE_HITS_DATAVIEW + |
---|
202 | "') to conform to CLARIN-FCS specification"); |
---|
203 | } |
---|
204 | |
---|
205 | // Resources |
---|
206 | final List<ResourceInfo> resources = |
---|
207 | parseResources(reader, 0, maxDepth, supportedDataViews); |
---|
208 | |
---|
209 | // skip over extensions |
---|
210 | while (!XmlStreamReaderUtils.peekEnd(reader, |
---|
211 | ED_NS_URI, "EndpointDescription")) { |
---|
212 | if (reader.isStartElement()) { |
---|
213 | final String namespaceURI = reader.getNamespaceURI(); |
---|
214 | final String localName = reader.getLocalName(); |
---|
215 | logger.debug("skipping over extension with element {{}}{}", |
---|
216 | namespaceURI, localName); |
---|
217 | XmlStreamReaderUtils.skipTag(reader, namespaceURI, localName); |
---|
218 | } |
---|
219 | } |
---|
220 | |
---|
221 | XmlStreamReaderUtils.readEnd(reader, ED_NS_URI, "EndpointDescription"); |
---|
222 | |
---|
223 | return new ClarinFCSEndpointDescription(version, capabilities, |
---|
224 | supportedDataViews, resources); |
---|
225 | } |
---|
226 | |
---|
227 | |
---|
228 | /** |
---|
229 | * Get the maximum resource enumeration parsing depth. The first level is |
---|
230 | * indicate by the value <code>0</code>. |
---|
231 | * |
---|
232 | * @return the default resource parsing depth or <code>-1</code> for |
---|
233 | * infinite. |
---|
234 | */ |
---|
235 | public int getMaximumResourcePArsingDepth() { |
---|
236 | return maxDepth; |
---|
237 | } |
---|
238 | |
---|
239 | |
---|
240 | private static List<ResourceInfo> parseResources(XMLStreamReader reader, |
---|
241 | int depth, int maxDepth, List<DataView> supportedDataviews) |
---|
242 | throws XMLStreamException { |
---|
243 | List<ResourceInfo> resources = null; |
---|
244 | |
---|
245 | XmlStreamReaderUtils.readStart(reader, ED_NS_URI, "Resources", true); |
---|
246 | while (XmlStreamReaderUtils.readStart(reader, ED_NS_URI, |
---|
247 | "Resource", (resources == null), true)) { |
---|
248 | final String pid = XmlStreamReaderUtils.readAttributeValue(reader, |
---|
249 | null, "pid", true); |
---|
250 | reader.next(); // consume start tag |
---|
251 | |
---|
252 | logger.debug("pid = {}", pid); |
---|
253 | |
---|
254 | final Map<String, String> title = |
---|
255 | parseI18String(reader, "Title", true); |
---|
256 | logger.debug("title: {}", title); |
---|
257 | |
---|
258 | final Map<String, String> description = |
---|
259 | parseI18String(reader, "Description", false); |
---|
260 | logger.debug("description: {}", description); |
---|
261 | |
---|
262 | final String landingPageURI = |
---|
263 | XmlStreamReaderUtils.readContent(reader, ED_NS_URI, |
---|
264 | "LandingPageURI", false); |
---|
265 | logger.debug("landingPageURI: {}", landingPageURI); |
---|
266 | |
---|
267 | List<String> languages = null; |
---|
268 | XmlStreamReaderUtils.readStart(reader, |
---|
269 | ED_NS_URI, "Languages", true); |
---|
270 | while (XmlStreamReaderUtils.readStart(reader, ED_NS_URI, |
---|
271 | "Language", (languages == null))) { |
---|
272 | final String language = |
---|
273 | XmlStreamReaderUtils.readString(reader, true); |
---|
274 | XmlStreamReaderUtils.readEnd(reader, ED_NS_URI, "Language"); |
---|
275 | if (languages == null) { |
---|
276 | languages = new ArrayList<String>(); |
---|
277 | } else { |
---|
278 | for (String l : languages) { |
---|
279 | if (l.equals(language)) { |
---|
280 | throw new XMLStreamException("language '" + |
---|
281 | language + "' was already defined " + |
---|
282 | "in '<Language>'", reader.getLocation()); |
---|
283 | } |
---|
284 | } // for |
---|
285 | } |
---|
286 | languages.add(language); |
---|
287 | } // while |
---|
288 | XmlStreamReaderUtils.readEnd(reader, ED_NS_URI, "Languages", true); |
---|
289 | logger.debug("languages: {}", languages); |
---|
290 | |
---|
291 | XmlStreamReaderUtils.readStart(reader, ED_NS_URI, "AvailableDataViews", true, true); |
---|
292 | final String dvs = XmlStreamReaderUtils.readAttributeValue(reader, null, "ref", true); |
---|
293 | reader.next(); // consume start tag |
---|
294 | XmlStreamReaderUtils.readEnd(reader, ED_NS_URI, "AvailableDataViews"); |
---|
295 | |
---|
296 | List<DataView> dataviews = null; |
---|
297 | for (String dv : dvs.split("\\s+")) { |
---|
298 | boolean found = false; |
---|
299 | for (DataView dataview : supportedDataviews) { |
---|
300 | if (dataview.getIdentifier().equals(dv)) { |
---|
301 | found = true; |
---|
302 | if (dataviews == null) { |
---|
303 | dataviews = new ArrayList<DataView>(); |
---|
304 | } |
---|
305 | dataviews.add(dataview); |
---|
306 | break; |
---|
307 | } |
---|
308 | } // for |
---|
309 | if (!found) { |
---|
310 | throw new XMLStreamException("DataView with id '" + dv + |
---|
311 | "' was not declared in <SupportedDataViews>", |
---|
312 | reader.getLocation()); |
---|
313 | } |
---|
314 | } // for |
---|
315 | logger.debug("DataViews: {}", dataviews); |
---|
316 | |
---|
317 | List<ResourceInfo> subResources = null; |
---|
318 | if (XmlStreamReaderUtils.peekStart(reader, |
---|
319 | ED_NS_URI, "Resources")) { |
---|
320 | final int nextDepth = depth + 1; |
---|
321 | if ((maxDepth == INFINITE_MAX_DEPTH) || |
---|
322 | (nextDepth < maxDepth)) { |
---|
323 | subResources = parseResources(reader, nextDepth, |
---|
324 | maxDepth, supportedDataviews); |
---|
325 | } else { |
---|
326 | XmlStreamReaderUtils.skipTag(reader, ED_NS_URI, |
---|
327 | "Resources", true); |
---|
328 | } |
---|
329 | } |
---|
330 | |
---|
331 | while (!XmlStreamReaderUtils.peekEnd(reader, |
---|
332 | ED_NS_URI, "Resource")) { |
---|
333 | if (reader.isStartElement()) { |
---|
334 | final String namespaceURI = reader.getNamespaceURI(); |
---|
335 | final String localName = reader.getLocalName(); |
---|
336 | logger.debug("skipping over extension with element " + |
---|
337 | "{{}}{} (resource)", namespaceURI, localName); |
---|
338 | XmlStreamReaderUtils.skipTag(reader, |
---|
339 | namespaceURI, localName); |
---|
340 | } |
---|
341 | } // while |
---|
342 | |
---|
343 | XmlStreamReaderUtils.readEnd(reader, ED_NS_URI, "Resource"); |
---|
344 | |
---|
345 | if (resources == null) { |
---|
346 | resources = new ArrayList<ResourceInfo>(); |
---|
347 | } |
---|
348 | resources.add(new ResourceInfo(pid, title, description, |
---|
349 | landingPageURI, languages, dataviews, subResources)); |
---|
350 | } // while |
---|
351 | XmlStreamReaderUtils.readEnd(reader, ED_NS_URI, "Resources"); |
---|
352 | |
---|
353 | return resources; |
---|
354 | } |
---|
355 | |
---|
356 | |
---|
357 | private static Map<String, String> parseI18String(XMLStreamReader reader, |
---|
358 | String localName, boolean required) throws XMLStreamException { |
---|
359 | Map<String, String> result = null; |
---|
360 | while (XmlStreamReaderUtils.readStart(reader, ED_NS_URI, localName, |
---|
361 | ((result == null) && required), true)) { |
---|
362 | final String lang = XmlStreamReaderUtils.readAttributeValue(reader, |
---|
363 | XMLConstants.XML_NS_URI, "lang", true); |
---|
364 | reader.next(); // skip start tag |
---|
365 | final String content = XmlStreamReaderUtils.readString(reader, true); |
---|
366 | if (result == null) { |
---|
367 | result = new HashMap<String, String>(); |
---|
368 | } |
---|
369 | if (result.containsKey(lang)) { |
---|
370 | throw new XMLStreamException("language '" + lang + |
---|
371 | "' already defined for element '<" + localName + ">'", |
---|
372 | reader.getLocation()); |
---|
373 | } else { |
---|
374 | result.put(lang, content); |
---|
375 | } |
---|
376 | XmlStreamReaderUtils.readEnd(reader, ED_NS_URI, localName); |
---|
377 | } // while |
---|
378 | return result; |
---|
379 | } |
---|
380 | |
---|
381 | |
---|
382 | private static int parseVersion(XMLStreamReader reader) |
---|
383 | throws XMLStreamException { |
---|
384 | try { |
---|
385 | final String s = XmlStreamReaderUtils.readAttributeValue( |
---|
386 | reader, null, "version", true); |
---|
387 | return Integer.parseInt(s); |
---|
388 | } catch (NumberFormatException e) { |
---|
389 | throw new XMLStreamException("Attribute 'version' is not a number", |
---|
390 | reader.getLocation(), e); |
---|
391 | } |
---|
392 | } |
---|
393 | |
---|
394 | |
---|
395 | private static DeliveryPolicy parsePolicy(XMLStreamReader reader) |
---|
396 | throws XMLStreamException { |
---|
397 | final String s = XmlStreamReaderUtils.readAttributeValue(reader, |
---|
398 | null, "delivery-policy", true); |
---|
399 | if ("send-by-default".equals(s)) { |
---|
400 | return DeliveryPolicy.SEND_BY_DEFAULT; |
---|
401 | } else if ("need-to-request".equals(s)) { |
---|
402 | return DeliveryPolicy.NEED_TO_REQUEST; |
---|
403 | } else { |
---|
404 | throw new XMLStreamException("Unexpected value '" + s + |
---|
405 | "' for attribute 'delivery-policy' on " + |
---|
406 | "element '<SupportedDataView>'", reader.getLocation()); |
---|
407 | } |
---|
408 | } |
---|
409 | |
---|
410 | } // class ClarinFCSEndpointDescriptionParser |
---|