1 | /** |
---|
2 | * This software is copyright (c) 2012-2014 by |
---|
3 | * - Institut fuer Deutsche Sprache (http://www.ids-mannheim.de) |
---|
4 | * This is free software. You can redistribute it |
---|
5 | * and/or modify it under the terms described in |
---|
6 | * the GNU General Public License v3 of which you |
---|
7 | * should have received a copy. Otherwise you can download |
---|
8 | * it from |
---|
9 | * |
---|
10 | * http://www.gnu.org/licenses/gpl-3.0.txt |
---|
11 | * |
---|
12 | * @copyright Institut fuer Deutsche Sprache (http://www.ids-mannheim.de) |
---|
13 | * |
---|
14 | * @license http://www.gnu.org/licenses/gpl-3.0.txt |
---|
15 | * GNU General Public License v3 |
---|
16 | */ |
---|
17 | package eu.clarin.sru.client.fcs; |
---|
18 | |
---|
19 | import java.net.URI; |
---|
20 | import java.net.URISyntaxException; |
---|
21 | import java.util.ArrayList; |
---|
22 | import java.util.HashMap; |
---|
23 | import java.util.List; |
---|
24 | import java.util.Map; |
---|
25 | |
---|
26 | import javax.xml.XMLConstants; |
---|
27 | import javax.xml.namespace.QName; |
---|
28 | import javax.xml.stream.XMLStreamException; |
---|
29 | import javax.xml.stream.XMLStreamReader; |
---|
30 | |
---|
31 | import org.slf4j.Logger; |
---|
32 | import org.slf4j.LoggerFactory; |
---|
33 | |
---|
34 | import eu.clarin.sru.client.SRUClientException; |
---|
35 | import eu.clarin.sru.client.SRUExtraResponseData; |
---|
36 | import eu.clarin.sru.client.SRUExtraResponseDataParser; |
---|
37 | import eu.clarin.sru.client.XmlStreamReaderUtils; |
---|
38 | import eu.clarin.sru.client.fcs.ClarinFCSEndpointDescription.DataView; |
---|
39 | import eu.clarin.sru.client.fcs.ClarinFCSEndpointDescription.DataView.DeliveryPolicy; |
---|
40 | import eu.clarin.sru.client.fcs.ClarinFCSEndpointDescription.ResourceInfo; |
---|
41 | |
---|
42 | |
---|
43 | /** |
---|
44 | * An extra response data parser for parsing CLARIN-FCS endpoint descriptions. |
---|
45 | */ |
---|
46 | public class ClarinFCSEndpointDescriptionParser implements |
---|
47 | SRUExtraResponseDataParser { |
---|
48 | public static final int INFINITE_MAX_DEPTH = -1; |
---|
49 | public static final int DEFAULT_MAX_DEPTH = INFINITE_MAX_DEPTH; |
---|
50 | private static final Logger logger = |
---|
51 | LoggerFactory.getLogger(ClarinFCSClientBuilder.class); |
---|
52 | private static final String ED_NS_URI = |
---|
53 | "http://clarin.eu/fcs/endpoint-description"; |
---|
54 | private static final QName ED_ROOT_ELEMENT = |
---|
55 | new QName(ED_NS_URI, "EndpointDescription"); |
---|
56 | private static final int EXPECTED_VERSION = 1; |
---|
57 | private static final String CAPABILITY_PREFIX = |
---|
58 | "http://clarin.eu/fcs/capability/"; |
---|
59 | private static final URI CAPABILITY_BASIC_SEARCH = |
---|
60 | URI.create("http://clarin.eu/fcs/capability/basic-search"); |
---|
61 | private static final String MIMETYPE_HITS_DATAVIEW = |
---|
62 | "application/x-clarin-fcs-hits+xml"; |
---|
63 | private final int maxDepth; |
---|
64 | |
---|
65 | |
---|
66 | public ClarinFCSEndpointDescriptionParser() { |
---|
67 | this(DEFAULT_MAX_DEPTH); |
---|
68 | } |
---|
69 | |
---|
70 | |
---|
71 | public ClarinFCSEndpointDescriptionParser(int maxDepth) { |
---|
72 | if (maxDepth < -1) { |
---|
73 | throw new IllegalArgumentException("maxDepth < -1"); |
---|
74 | } |
---|
75 | this.maxDepth = maxDepth; |
---|
76 | } |
---|
77 | |
---|
78 | |
---|
79 | @Override |
---|
80 | public boolean supports(QName name) { |
---|
81 | return ED_ROOT_ELEMENT.equals(name); |
---|
82 | } |
---|
83 | |
---|
84 | |
---|
85 | @Override |
---|
86 | public SRUExtraResponseData parse(XMLStreamReader reader) |
---|
87 | throws XMLStreamException, SRUClientException { |
---|
88 | final int version = parseVersion(reader); |
---|
89 | if (version != EXPECTED_VERSION) { |
---|
90 | throw new SRUClientException("Attribute 'version' of " + |
---|
91 | "element '<EndpointDescription>' must be of value '1'"); |
---|
92 | } |
---|
93 | logger.error("VERSION: {}", version); |
---|
94 | reader.next(); // consume start tag |
---|
95 | |
---|
96 | // Capabilities |
---|
97 | List<URI> capabilities = null; |
---|
98 | XmlStreamReaderUtils.readStart(reader, ED_NS_URI, "Capabilities", true); |
---|
99 | while (XmlStreamReaderUtils.readStart(reader, ED_NS_URI, |
---|
100 | "Capability", (capabilities == null))) { |
---|
101 | final String s = XmlStreamReaderUtils.readString(reader, true); |
---|
102 | try { |
---|
103 | if (!s.startsWith(CAPABILITY_PREFIX)) { |
---|
104 | throw new XMLStreamException("Capabilites must start " + |
---|
105 | "with prefix '" + CAPABILITY_PREFIX + |
---|
106 | "' (offending value = '" + s +"')", |
---|
107 | reader.getLocation()); |
---|
108 | } |
---|
109 | final URI uri = new URI(s); |
---|
110 | if (capabilities == null) { |
---|
111 | capabilities = new ArrayList<URI>(); |
---|
112 | } |
---|
113 | capabilities.add(uri); |
---|
114 | } catch (URISyntaxException e) { |
---|
115 | throw new XMLStreamException("Capabilities must be encoded " + |
---|
116 | "as URIs (offending value = '" + s + "')", |
---|
117 | reader.getLocation(), e); |
---|
118 | } |
---|
119 | XmlStreamReaderUtils.readEnd(reader, ED_NS_URI, "Capability"); |
---|
120 | } // while |
---|
121 | XmlStreamReaderUtils.readEnd(reader, ED_NS_URI, "Capabilities"); |
---|
122 | if ((capabilities == null) || |
---|
123 | (capabilities.indexOf(CAPABILITY_BASIC_SEARCH) == -1)) { |
---|
124 | throw new SRUClientException("Endpoint must support " + |
---|
125 | "'basic-search' (" + CAPABILITY_BASIC_SEARCH + |
---|
126 | ") to conform to CLARIN-FCS specification"); |
---|
127 | } |
---|
128 | |
---|
129 | // SupportedDataViews |
---|
130 | List<DataView> supportedDataViews = null; |
---|
131 | XmlStreamReaderUtils.readStart(reader, ED_NS_URI, |
---|
132 | "SupportedDataViews", true); |
---|
133 | while (XmlStreamReaderUtils.readStart(reader, ED_NS_URI, |
---|
134 | "SupportedDataView", (supportedDataViews == null), true)) { |
---|
135 | final String id = XmlStreamReaderUtils.readAttributeValue( |
---|
136 | reader, null, "id", true); |
---|
137 | if ((id.indexOf(' ') != -1) || (id.indexOf(',') != -1) || |
---|
138 | (id.indexOf(';') != -1)) { |
---|
139 | throw new XMLStreamException("Value of attribute 'id' on " + |
---|
140 | "element '<SupportedDataView>' may not contain the " + |
---|
141 | "characters ',' (comma) or ';' (semicolon) " + |
---|
142 | "or ' ' (space)", reader.getLocation()); |
---|
143 | } |
---|
144 | final DeliveryPolicy policy = parsePolicy(reader); |
---|
145 | reader.next(); // consume start tag |
---|
146 | |
---|
147 | final String type = XmlStreamReaderUtils.readString(reader, true); |
---|
148 | // do some sanity checks ... |
---|
149 | if (supportedDataViews != null) { |
---|
150 | for (DataView dataView : supportedDataViews) { |
---|
151 | if (dataView.getIdentifier().equals(id)) { |
---|
152 | throw new XMLStreamException("Supported data view " + |
---|
153 | "with identifier '" + id + |
---|
154 | "' was already declared", reader.getLocation()); |
---|
155 | } |
---|
156 | if (dataView.getMimeType().equals(type)) { |
---|
157 | throw new XMLStreamException("Supported data view " + |
---|
158 | "with MIME type '" + type + |
---|
159 | "' was already declared", reader.getLocation()); |
---|
160 | } |
---|
161 | } |
---|
162 | } else { |
---|
163 | supportedDataViews = new ArrayList<DataView>(); |
---|
164 | } |
---|
165 | supportedDataViews.add(new DataView(id, type, policy)); |
---|
166 | XmlStreamReaderUtils.readEnd(reader, |
---|
167 | ED_NS_URI, "SupportedDataView"); |
---|
168 | } // while |
---|
169 | XmlStreamReaderUtils.readEnd(reader, ED_NS_URI, |
---|
170 | "SupportedDataViews", true); |
---|
171 | boolean found = false; |
---|
172 | if (supportedDataViews != null) { |
---|
173 | for (DataView dataView : supportedDataViews) { |
---|
174 | if (MIMETYPE_HITS_DATAVIEW.equals(dataView.getMimeType())) { |
---|
175 | found = true; |
---|
176 | break; |
---|
177 | } |
---|
178 | } |
---|
179 | } |
---|
180 | if (!found) { |
---|
181 | throw new SRUClientException("Endpoint must support " + |
---|
182 | "generic hits dataview (expected MIME type '" + |
---|
183 | MIMETYPE_HITS_DATAVIEW + |
---|
184 | "') to conform to CLARIN-FCS specification"); |
---|
185 | } |
---|
186 | |
---|
187 | // Resources |
---|
188 | final List<ResourceInfo> resources = |
---|
189 | parseResources(reader, 0, supportedDataViews); |
---|
190 | |
---|
191 | // skip over extensions |
---|
192 | while (!XmlStreamReaderUtils.peekEnd(reader, |
---|
193 | ED_NS_URI, "EndpointDescription")) { |
---|
194 | if (reader.isStartElement()) { |
---|
195 | final String namespaceURI = reader.getNamespaceURI(); |
---|
196 | final String localName = reader.getLocalName(); |
---|
197 | logger.debug("skipping over extension with element {{}}{}", |
---|
198 | namespaceURI, localName); |
---|
199 | XmlStreamReaderUtils.skipTag(reader, namespaceURI, localName); |
---|
200 | } |
---|
201 | } |
---|
202 | |
---|
203 | XmlStreamReaderUtils.readEnd(reader, ED_NS_URI, "EndpointDescription"); |
---|
204 | |
---|
205 | return new ClarinFCSEndpointDescription(version, capabilities, |
---|
206 | supportedDataViews, resources); |
---|
207 | } |
---|
208 | |
---|
209 | |
---|
210 | private List<ResourceInfo> parseResources(XMLStreamReader reader, int depth, |
---|
211 | List<DataView> supportedDataviews) throws XMLStreamException { |
---|
212 | List<ResourceInfo> resources = null; |
---|
213 | |
---|
214 | XmlStreamReaderUtils.readStart(reader, ED_NS_URI, "Resources", true); |
---|
215 | while (XmlStreamReaderUtils.readStart(reader, ED_NS_URI, |
---|
216 | "Resource", (resources == null), true)) { |
---|
217 | final String pid = XmlStreamReaderUtils.readAttributeValue(reader, |
---|
218 | null, "pid", true); |
---|
219 | reader.next(); // consume start tag |
---|
220 | |
---|
221 | logger.debug("pid = {}", pid); |
---|
222 | |
---|
223 | final Map<String, String> title = |
---|
224 | parseI18String(reader, "Title", true); |
---|
225 | logger.debug("title: {}", title); |
---|
226 | |
---|
227 | final Map<String, String> description = |
---|
228 | parseI18String(reader, "Description", false); |
---|
229 | logger.debug("description: {}", description); |
---|
230 | |
---|
231 | final String landingPageURI = |
---|
232 | XmlStreamReaderUtils.readContent(reader, ED_NS_URI, |
---|
233 | "LandingPageURI", false); |
---|
234 | logger.debug("landingPageURI: {}", landingPageURI); |
---|
235 | |
---|
236 | List<String> languages = null; |
---|
237 | XmlStreamReaderUtils.readStart(reader, |
---|
238 | ED_NS_URI, "Languages", true); |
---|
239 | while (XmlStreamReaderUtils.readStart(reader, ED_NS_URI, |
---|
240 | "Language", (languages == null))) { |
---|
241 | final String language = |
---|
242 | XmlStreamReaderUtils.readString(reader, true); |
---|
243 | XmlStreamReaderUtils.readEnd(reader, ED_NS_URI, "Language"); |
---|
244 | if (languages == null) { |
---|
245 | languages = new ArrayList<String>(); |
---|
246 | } else { |
---|
247 | for (String l : languages) { |
---|
248 | if (l.equals(language)) { |
---|
249 | throw new XMLStreamException("language '" + |
---|
250 | language + "' was already defined " + |
---|
251 | "in '<Language>'", reader.getLocation()); |
---|
252 | } |
---|
253 | } // for |
---|
254 | } |
---|
255 | languages.add(language); |
---|
256 | } // while |
---|
257 | XmlStreamReaderUtils.readEnd(reader, ED_NS_URI, "Languages", true); |
---|
258 | logger.debug("languages: {}", languages); |
---|
259 | |
---|
260 | XmlStreamReaderUtils.readStart(reader, ED_NS_URI, "AvailableDataViews", true, true); |
---|
261 | final String dvs = XmlStreamReaderUtils.readAttributeValue(reader, null, "ref", true); |
---|
262 | reader.next(); // consume start tag |
---|
263 | XmlStreamReaderUtils.readEnd(reader, ED_NS_URI, "AvailableDataViews"); |
---|
264 | |
---|
265 | List<DataView> dataviews = null; |
---|
266 | for (String dv : dvs.split("\\s+")) { |
---|
267 | boolean found = false; |
---|
268 | for (DataView dataview : supportedDataviews) { |
---|
269 | if (dataview.getIdentifier().equals(dv)) { |
---|
270 | found = true; |
---|
271 | if (dataviews == null) { |
---|
272 | dataviews = new ArrayList<DataView>(); |
---|
273 | } |
---|
274 | dataviews.add(dataview); |
---|
275 | break; |
---|
276 | } |
---|
277 | } // for |
---|
278 | if (!found) { |
---|
279 | throw new XMLStreamException("DataView with id '" + dv + |
---|
280 | "' was not declared in <SupportedDataViews>", |
---|
281 | reader.getLocation()); |
---|
282 | } |
---|
283 | } // for |
---|
284 | logger.debug("DataViews: {}", dataviews); |
---|
285 | |
---|
286 | List<ResourceInfo> subResources = null; |
---|
287 | if (XmlStreamReaderUtils.peekStart(reader, ED_NS_URI, "Resources")) { |
---|
288 | final int nextDepth = depth + 1; |
---|
289 | if ((maxDepth == INFINITE_MAX_DEPTH) || (nextDepth < maxDepth)) { |
---|
290 | subResources = parseResources(reader, nextDepth, |
---|
291 | supportedDataviews); |
---|
292 | } else { |
---|
293 | XmlStreamReaderUtils.skipTag(reader, ED_NS_URI, |
---|
294 | "Resources", true); |
---|
295 | } |
---|
296 | } |
---|
297 | |
---|
298 | while (!XmlStreamReaderUtils.peekEnd(reader, |
---|
299 | ED_NS_URI, "Resource")) { |
---|
300 | if (reader.isStartElement()) { |
---|
301 | final String namespaceURI = reader.getNamespaceURI(); |
---|
302 | final String localName = reader.getLocalName(); |
---|
303 | logger.debug("skipping over extension with element " + |
---|
304 | "{{}}{} (resource)", namespaceURI, localName); |
---|
305 | XmlStreamReaderUtils.skipTag(reader, |
---|
306 | namespaceURI, localName); |
---|
307 | } |
---|
308 | } // while |
---|
309 | |
---|
310 | XmlStreamReaderUtils.readEnd(reader, ED_NS_URI, "Resource"); |
---|
311 | |
---|
312 | if (resources == null) { |
---|
313 | resources = new ArrayList<ResourceInfo>(); |
---|
314 | } |
---|
315 | resources.add(new ResourceInfo(pid, title, description, |
---|
316 | landingPageURI, languages, dataviews, subResources)); |
---|
317 | } // while |
---|
318 | XmlStreamReaderUtils.readEnd(reader, ED_NS_URI, "Resources"); |
---|
319 | |
---|
320 | return resources; |
---|
321 | } |
---|
322 | |
---|
323 | |
---|
324 | private static Map<String, String> parseI18String(XMLStreamReader reader, |
---|
325 | String localName, boolean required) throws XMLStreamException { |
---|
326 | Map<String, String> result = null; |
---|
327 | while (XmlStreamReaderUtils.readStart(reader, ED_NS_URI, localName, |
---|
328 | ((result == null) && required), true)) { |
---|
329 | final String lang = XmlStreamReaderUtils.readAttributeValue(reader, |
---|
330 | XMLConstants.XML_NS_URI, "lang", true); |
---|
331 | reader.next(); // skip start tag |
---|
332 | final String content = XmlStreamReaderUtils.readString(reader, true); |
---|
333 | if (result == null) { |
---|
334 | result = new HashMap<String, String>(); |
---|
335 | } |
---|
336 | if (result.containsKey(lang)) { |
---|
337 | throw new XMLStreamException("language '" + lang + |
---|
338 | "' already defined for element '<" + localName + ">'", |
---|
339 | reader.getLocation()); |
---|
340 | } else { |
---|
341 | result.put(lang, content); |
---|
342 | } |
---|
343 | XmlStreamReaderUtils.readEnd(reader, ED_NS_URI, localName); |
---|
344 | } // while |
---|
345 | return result; |
---|
346 | } |
---|
347 | |
---|
348 | |
---|
349 | private static int parseVersion(XMLStreamReader reader) |
---|
350 | throws XMLStreamException { |
---|
351 | try { |
---|
352 | final String s = XmlStreamReaderUtils.readAttributeValue( |
---|
353 | reader, null, "version", true); |
---|
354 | return Integer.parseInt(s); |
---|
355 | } catch (NumberFormatException e) { |
---|
356 | throw new XMLStreamException("Attribute 'version' is not a number", |
---|
357 | reader.getLocation(), e); |
---|
358 | } |
---|
359 | } |
---|
360 | |
---|
361 | |
---|
362 | private static DeliveryPolicy parsePolicy(XMLStreamReader reader) |
---|
363 | throws XMLStreamException { |
---|
364 | final String s = XmlStreamReaderUtils.readAttributeValue(reader, |
---|
365 | null, "delivery-policy", true); |
---|
366 | if ("send-by-default".equals(s)) { |
---|
367 | return DeliveryPolicy.SEND_BY_DEFAULT; |
---|
368 | } else if ("need-to-request".equals(s)) { |
---|
369 | return DeliveryPolicy.NEED_TO_REQUEST; |
---|
370 | } else { |
---|
371 | throw new XMLStreamException("Unexpected value '" + s + |
---|
372 | "' for attribute 'delivery-policy' on " + |
---|
373 | "element '<SupportedDataView>'", reader.getLocation()); |
---|
374 | } |
---|
375 | } |
---|
376 | |
---|
377 | } // class ClarinFCSEndpointDescriptionParser |
---|