1 | /** |
---|
2 | * This software is copyright (c) 2013-2022 by |
---|
3 | * - Leibniz-Institut fuer Deutsche Sprache (http://www.ids-mannheim.de) |
---|
4 | * This is free software. You can redistribute it |
---|
5 | * and/or modify it under the terms described in |
---|
6 | * the GNU General Public License v3 of which you |
---|
7 | * should have received a copy. Otherwise you can download |
---|
8 | * it from |
---|
9 | * |
---|
10 | * http://www.gnu.org/licenses/gpl-3.0.txt |
---|
11 | * |
---|
12 | * @copyright Leibniz-Institut fuer Deutsche Sprache (http://www.ids-mannheim.de) |
---|
13 | * |
---|
14 | * @license http://www.gnu.org/licenses/gpl-3.0.txt |
---|
15 | * GNU General Public License v3 |
---|
16 | */ |
---|
17 | package eu.clarin.sru.server.fcs.utils; |
---|
18 | |
---|
19 | import java.io.IOException; |
---|
20 | import java.net.URI; |
---|
21 | import java.net.URISyntaxException; |
---|
22 | import java.net.URL; |
---|
23 | import java.util.ArrayList; |
---|
24 | import java.util.HashMap; |
---|
25 | import java.util.HashSet; |
---|
26 | import java.util.Iterator; |
---|
27 | import java.util.List; |
---|
28 | import java.util.Map; |
---|
29 | import java.util.Set; |
---|
30 | |
---|
31 | import javax.xml.XMLConstants; |
---|
32 | import javax.xml.namespace.NamespaceContext; |
---|
33 | import javax.xml.parsers.DocumentBuilder; |
---|
34 | import javax.xml.parsers.DocumentBuilderFactory; |
---|
35 | import javax.xml.parsers.ParserConfigurationException; |
---|
36 | import javax.xml.xpath.XPath; |
---|
37 | import javax.xml.xpath.XPathConstants; |
---|
38 | import javax.xml.xpath.XPathExpression; |
---|
39 | import javax.xml.xpath.XPathExpressionException; |
---|
40 | import javax.xml.xpath.XPathFactory; |
---|
41 | |
---|
42 | import org.slf4j.Logger; |
---|
43 | import org.slf4j.LoggerFactory; |
---|
44 | import org.w3c.dom.Document; |
---|
45 | import org.w3c.dom.Element; |
---|
46 | import org.w3c.dom.Node; |
---|
47 | import org.w3c.dom.NodeList; |
---|
48 | import org.xml.sax.SAXException; |
---|
49 | |
---|
50 | import eu.clarin.sru.server.SRUConfigException; |
---|
51 | import eu.clarin.sru.server.fcs.DataView; |
---|
52 | import eu.clarin.sru.server.fcs.DataView.DeliveryPolicy; |
---|
53 | import eu.clarin.sru.server.fcs.EndpointDescription; |
---|
54 | import eu.clarin.sru.server.fcs.Layer; |
---|
55 | import eu.clarin.sru.server.fcs.ResourceInfo; |
---|
56 | |
---|
57 | |
---|
58 | /** |
---|
59 | * A parser, that parses an XML file and produces a endpoint description with |
---|
60 | * static list of resource info records. The XML file has the same format as the |
---|
61 | * result format defined for endpoint description of the CLARIN-FCS |
---|
62 | * specification. The {@link #parse(URL)} returns a |
---|
63 | * {@link SimpleEndpointDescription} instance. |
---|
64 | * |
---|
65 | * @see EndpointDescription |
---|
66 | * @see SimpleEndpointDescription |
---|
67 | */ |
---|
68 | public class SimpleEndpointDescriptionParser { |
---|
69 | private static final String NS = |
---|
70 | "http://clarin.eu/fcs/endpoint-description"; |
---|
71 | private static final String NS_LEGACY = |
---|
72 | "http://clarin.eu/fcs/1.0/resource-info"; |
---|
73 | private static final URI CAP_BASIC_SEARCH = |
---|
74 | URI.create("http://clarin.eu/fcs/capability/basic-search"); |
---|
75 | private static final URI CAP_ADVANCED_SEARCH = |
---|
76 | URI.create("http://clarin.eu/fcs/capability/advanced-search"); |
---|
77 | private static final String MIMETYPE_HITS = "application/x-clarin-fcs-hits+xml"; |
---|
78 | private static final String MIMETYPE_ADV = "application/x-clarin-fcs-adv+xml"; |
---|
79 | private static final String LANG_EN = "en"; |
---|
80 | private static final String POLICY_SEND_DEFAULT = "send-by-default"; |
---|
81 | private static final String POLICY_NEED_REQUEST = "need-to-request"; |
---|
82 | private static final String LAYER_ENCODING_VALUE = "value"; |
---|
83 | private static final String LAYER_ENCODING_EMPTY = "empty"; |
---|
84 | private static final Logger logger = |
---|
85 | LoggerFactory.getLogger(SimpleEndpointDescriptionParser.class); |
---|
86 | |
---|
87 | |
---|
88 | /** |
---|
89 | * Parse an XML file and return a static list of resource info records. |
---|
90 | * |
---|
91 | * @param url |
---|
92 | * the URI pointing to the file to be parsed |
---|
93 | * @return an {@link EndpointDescription} instance |
---|
94 | * @throws SRUConfigException |
---|
95 | * if an error occurred |
---|
96 | */ |
---|
97 | public static EndpointDescription parse(URL url) throws SRUConfigException { |
---|
98 | if (url == null) { |
---|
99 | throw new NullPointerException("url == null"); |
---|
100 | } |
---|
101 | |
---|
102 | logger.debug("parsing endpoint description from: {}", url); |
---|
103 | |
---|
104 | try { |
---|
105 | DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); |
---|
106 | dbf.setNamespaceAware(true); |
---|
107 | dbf.setCoalescing(true); |
---|
108 | DocumentBuilder db = dbf.newDocumentBuilder(); |
---|
109 | Document doc = db.parse(url.openStream()); |
---|
110 | |
---|
111 | /* |
---|
112 | * Detect for deprecated resource-info catalog files and bail, if necessary |
---|
113 | */ |
---|
114 | checkLegacyMode(doc, url); |
---|
115 | |
---|
116 | /* |
---|
117 | * Parse on and create endpoint description ... |
---|
118 | */ |
---|
119 | return parseEndpointDescription(doc); |
---|
120 | } catch (ParserConfigurationException e) { |
---|
121 | throw new SRUConfigException("internal error", e); |
---|
122 | } catch (SAXException e) { |
---|
123 | throw new SRUConfigException("parsing error", e); |
---|
124 | } catch (IOException e) { |
---|
125 | throw new SRUConfigException("error reading file", e); |
---|
126 | } catch (XPathExpressionException e) { |
---|
127 | throw new SRUConfigException("internal error", e); |
---|
128 | } |
---|
129 | } |
---|
130 | |
---|
131 | |
---|
132 | private static EndpointDescription parseEndpointDescription(Document doc) |
---|
133 | throws SRUConfigException, XPathExpressionException { |
---|
134 | XPathFactory factory = XPathFactory.newInstance(); |
---|
135 | XPath xpath = factory.newXPath(); |
---|
136 | |
---|
137 | xpath.setNamespaceContext(new NamespaceContext() { |
---|
138 | @Override |
---|
139 | public Iterator<String> getPrefixes(String namespaceURI) { |
---|
140 | throw new UnsupportedOperationException(); |
---|
141 | } |
---|
142 | |
---|
143 | @Override |
---|
144 | public String getPrefix(String namespaceURI) { |
---|
145 | throw new UnsupportedOperationException(); |
---|
146 | } |
---|
147 | |
---|
148 | @Override |
---|
149 | public String getNamespaceURI(String prefix) { |
---|
150 | if (prefix == null) { |
---|
151 | throw new NullPointerException("prefix == null"); |
---|
152 | } |
---|
153 | if (prefix.equals("ed")) { |
---|
154 | return NS; |
---|
155 | } else if (prefix.equals(XMLConstants.XML_NS_PREFIX)) { |
---|
156 | return XMLConstants.XML_NS_URI; |
---|
157 | } else { |
---|
158 | return XMLConstants.NULL_NS_URI; |
---|
159 | } |
---|
160 | } |
---|
161 | }); |
---|
162 | |
---|
163 | // version |
---|
164 | int version = -1; |
---|
165 | XPathExpression exp = |
---|
166 | xpath.compile("//ed:EndpointDescription/@version"); |
---|
167 | String v = (String) exp.evaluate(doc, XPathConstants.STRING); |
---|
168 | if (v != null) { |
---|
169 | try { |
---|
170 | version = Integer.parseInt(v); |
---|
171 | if ((version != 1) && (version != 2)) { |
---|
172 | throw new SRUConfigException("Attribute @version " + |
---|
173 | "element <EndpointDescription> must have a " + |
---|
174 | "value of either '1' or '2' "); |
---|
175 | } |
---|
176 | } catch (NumberFormatException e) { |
---|
177 | throw new SRUConfigException("Cannot parse version number", e); |
---|
178 | } |
---|
179 | } |
---|
180 | if (version == -1) { |
---|
181 | throw new SRUConfigException("Attribute @version missing on "+ |
---|
182 | "element <EndpointDescription>"); |
---|
183 | } |
---|
184 | logger.debug("endpoint description version is {}", version); |
---|
185 | |
---|
186 | // capabilities |
---|
187 | List<URI> capabilities = new ArrayList<>(); |
---|
188 | exp = xpath.compile("//ed:Capabilities/ed:Capability"); |
---|
189 | NodeList list = |
---|
190 | (NodeList) exp.evaluate(doc, XPathConstants.NODESET); |
---|
191 | if ((list != null) && (list.getLength() > 0)) { |
---|
192 | logger.debug("parsing capabilities"); |
---|
193 | for (int i = 0; i < list.getLength(); i++) { |
---|
194 | String s = list.item(i).getTextContent().trim(); |
---|
195 | try { |
---|
196 | URI uri = new URI(s); |
---|
197 | if (!capabilities.contains(uri)) { |
---|
198 | capabilities.add(uri); |
---|
199 | } else { |
---|
200 | logger.warn("ignoring duplicate capability " + |
---|
201 | "entry for '{}'", uri); |
---|
202 | } |
---|
203 | } catch (URISyntaxException e) { |
---|
204 | throw new SRUConfigException("capability is not encoded " + |
---|
205 | "as a proper URI: " + s); |
---|
206 | } |
---|
207 | } |
---|
208 | } else { |
---|
209 | logger.warn("No capabilities where defined in " + |
---|
210 | "endpoint configuration"); |
---|
211 | } |
---|
212 | if (!capabilities.contains(CAP_BASIC_SEARCH)) { |
---|
213 | logger.warn("capability '{}' was not defined in endpoint " + |
---|
214 | "description; it was added to meet the specification. Please " + |
---|
215 | "update your endpoint description!", CAP_BASIC_SEARCH); |
---|
216 | capabilities.add(CAP_BASIC_SEARCH); |
---|
217 | } |
---|
218 | if (capabilities.contains(CAP_ADVANCED_SEARCH) && (version < 2)) { |
---|
219 | logger.warn("Endpoint description is declared as version " + |
---|
220 | "FCS 1.0 (@version = 1), but contains support for " + |
---|
221 | "Advanced Search in capabilities list! FCS 1.0 only " + |
---|
222 | "supports Basic Search"); |
---|
223 | } |
---|
224 | logger.debug("CAPS:'{}'", capabilities); |
---|
225 | |
---|
226 | // used to check for uniqueness of id attribute |
---|
227 | final Set<String> xml_ids = new HashSet<>(); |
---|
228 | |
---|
229 | // supported data views |
---|
230 | List<DataView> supportedDataViews = new ArrayList<>(); |
---|
231 | exp = xpath.compile("//ed:SupportedDataViews/ed:SupportedDataView"); |
---|
232 | list = (NodeList) exp.evaluate(doc, XPathConstants.NODESET); |
---|
233 | if ((list != null) && (list.getLength() > 0)) { |
---|
234 | logger.debug("parsing supported data views"); |
---|
235 | for (int i = 0; i < list.getLength(); i++) { |
---|
236 | Element item = (Element) list.item(i); |
---|
237 | String id = getAttribute(item, "id"); |
---|
238 | if (id == null) { |
---|
239 | throw new SRUConfigException("Element <SupportedDataView> " |
---|
240 | + "must have a proper 'id' attribute"); |
---|
241 | } |
---|
242 | |
---|
243 | if (xml_ids.contains(id)) { |
---|
244 | throw new SRUConfigException("The value of attribute " + |
---|
245 | "'id' of element <SupportedDataView> must be " + |
---|
246 | "unique: " + id); |
---|
247 | } |
---|
248 | xml_ids.add(id); |
---|
249 | |
---|
250 | String p = getAttribute(item, "delivery-policy"); |
---|
251 | if (p == null) { |
---|
252 | throw new SRUConfigException("Element <SupportedDataView> " |
---|
253 | + "must have a 'delivery-policy' attribute"); |
---|
254 | } |
---|
255 | DeliveryPolicy policy = null; |
---|
256 | if (POLICY_SEND_DEFAULT.equals(p)) { |
---|
257 | policy = DeliveryPolicy.SEND_BY_DEFAULT; |
---|
258 | } else if (POLICY_NEED_REQUEST.equals(p)) { |
---|
259 | policy = DeliveryPolicy.NEED_TO_REQUEST; |
---|
260 | } else { |
---|
261 | throw new SRUConfigException("Invalid value '" + p + |
---|
262 | "' for attribute 'delivery-policy' on element " + |
---|
263 | "<SupportedDataView>"); |
---|
264 | } |
---|
265 | String mimeType = item.getTextContent(); |
---|
266 | if (mimeType != null) { |
---|
267 | mimeType = mimeType.trim(); |
---|
268 | if (mimeType.isEmpty()) { |
---|
269 | mimeType = null; |
---|
270 | } |
---|
271 | } |
---|
272 | if (mimeType == null) { |
---|
273 | throw new SRUConfigException("Element <SupportedDataView> " |
---|
274 | + "must contain a MIME-type as content"); |
---|
275 | } |
---|
276 | // check for duplicate entries ... |
---|
277 | for (DataView dataView : supportedDataViews) { |
---|
278 | if (id.equals(dataView.getIdentifier())) { |
---|
279 | throw new SRUConfigException( |
---|
280 | "A <SupportedDataView> with " + "the id '" + |
---|
281 | id + "' is already defined!"); |
---|
282 | } |
---|
283 | if (mimeType.equals(dataView.getMimeType())) { |
---|
284 | throw new SRUConfigException( |
---|
285 | "A <SupportedDataView> with " + |
---|
286 | "the MIME-type '" + mimeType + |
---|
287 | "' is already defined!"); |
---|
288 | } |
---|
289 | } |
---|
290 | supportedDataViews.add(new DataView(id, mimeType, policy)); |
---|
291 | } |
---|
292 | } else { |
---|
293 | logger.error("Endpoint configuration contains no valid " + |
---|
294 | "information about supported data views"); |
---|
295 | throw new SRUConfigException("Endpoint configuration contains " + |
---|
296 | "no valid information about supported data views"); |
---|
297 | } |
---|
298 | |
---|
299 | logger.debug("DV: {}", supportedDataViews); |
---|
300 | |
---|
301 | // sanity check on data views |
---|
302 | boolean hasHitsView = false; |
---|
303 | boolean hasAdvView = false; |
---|
304 | |
---|
305 | for (DataView dataView : supportedDataViews) { |
---|
306 | if (dataView.getMimeType().equals(MIMETYPE_HITS)) { |
---|
307 | hasHitsView = true; |
---|
308 | } else if (dataView.getMimeType().equals(MIMETYPE_ADV)) { |
---|
309 | hasAdvView = true; |
---|
310 | } |
---|
311 | } |
---|
312 | if (!hasHitsView) { |
---|
313 | throw new SRUConfigException("Generic Hits Data View (" + |
---|
314 | MIMETYPE_HITS + ") was not declared in <SupportedDataViews>"); |
---|
315 | } |
---|
316 | if (capabilities.contains(CAP_ADVANCED_SEARCH) && !hasAdvView) { |
---|
317 | throw new SRUConfigException("Endpoint claimes to support " + |
---|
318 | "Advanced FCS but does not declare Advanced Data View (" + |
---|
319 | MIMETYPE_ADV + ") in <SupportedDataViews>"); |
---|
320 | } |
---|
321 | |
---|
322 | // supported layers |
---|
323 | List<Layer> supportedLayers = null; |
---|
324 | exp = xpath.compile("//ed:SupportedLayers/ed:SupportedLayer"); |
---|
325 | list = (NodeList) exp.evaluate(doc, XPathConstants.NODESET); |
---|
326 | if ((list != null) && (list.getLength() > 0)) { |
---|
327 | logger.debug("parsing supported layers"); |
---|
328 | for (int i = 0; i < list.getLength(); i++) { |
---|
329 | Element item = (Element) list.item(i); |
---|
330 | String id = getAttribute(item, "id"); |
---|
331 | if (id == null) { |
---|
332 | throw new SRUConfigException("Element <SupportedLayer> " |
---|
333 | + "must have a proper 'id' attribute"); |
---|
334 | } |
---|
335 | |
---|
336 | if (xml_ids.contains(id)) { |
---|
337 | throw new SRUConfigException("The value of attribute " + |
---|
338 | "'id' of element <SupportedLayer> must be " + |
---|
339 | "unique: " + id); |
---|
340 | } |
---|
341 | xml_ids.add(id); |
---|
342 | |
---|
343 | String s = getAttribute(item, "result-id"); |
---|
344 | if (s == null) { |
---|
345 | throw new SRUConfigException("Element <SupportedLayer> " |
---|
346 | + "must have a proper 'result-id' attribute"); |
---|
347 | } |
---|
348 | URI resultId = null; |
---|
349 | try { |
---|
350 | resultId = new URI(s); |
---|
351 | } catch (URISyntaxException e) { |
---|
352 | throw new SRUConfigException("Attribute 'result-id' on " + |
---|
353 | "Element <SupportedLayer> is not encoded " + |
---|
354 | "as proper URI: " + s); |
---|
355 | } |
---|
356 | |
---|
357 | String type = cleanString(item.getTextContent()); |
---|
358 | if ((type != null) && !type.isEmpty()) { |
---|
359 | // sanity check on layer types |
---|
360 | if (!(type.equals("text") || |
---|
361 | type.equals("lemma") || |
---|
362 | type.equals("pos") || |
---|
363 | type.equals("orth") || |
---|
364 | type.equals("norm") || |
---|
365 | type.equals("phonetic") || |
---|
366 | type.startsWith("x-"))) { |
---|
367 | logger.warn("layer type '{}' is not defined by specification", type); |
---|
368 | } |
---|
369 | } else { |
---|
370 | throw new SRUConfigException("Element <SupportedLayer> " + |
---|
371 | "does not define a proper layer type"); |
---|
372 | } |
---|
373 | |
---|
374 | String qualifier = getAttribute(item, "qualifier"); |
---|
375 | |
---|
376 | Layer.ContentEncoding encoding = |
---|
377 | Layer.ContentEncoding.VALUE; |
---|
378 | s = getAttribute(item, "type"); |
---|
379 | if (s != null) { |
---|
380 | if (LAYER_ENCODING_VALUE.equals(s)) { |
---|
381 | encoding = Layer.ContentEncoding.VALUE; |
---|
382 | } else if (LAYER_ENCODING_EMPTY.equals(s)) { |
---|
383 | encoding = Layer.ContentEncoding.EMPTY; |
---|
384 | } else { |
---|
385 | throw new SRUConfigException( |
---|
386 | "invalid layer encoding: " + s); |
---|
387 | } |
---|
388 | } |
---|
389 | |
---|
390 | |
---|
391 | String altValueInfo = getAttribute(item, "alt-value-info"); |
---|
392 | URI altValueInfoURI = null; |
---|
393 | if (altValueInfo != null) { |
---|
394 | s = getAttribute(item, "alt-value-info-uri"); |
---|
395 | if (s != null) { |
---|
396 | try { |
---|
397 | altValueInfoURI = new URI(s); |
---|
398 | } catch (URISyntaxException e) { |
---|
399 | throw new SRUConfigException("Attribute " + |
---|
400 | "'alt-value-info-uri' on Element " + |
---|
401 | "<SupportedLayer> is not encoded " + |
---|
402 | "as proper URI: " + s); |
---|
403 | } |
---|
404 | } |
---|
405 | } |
---|
406 | |
---|
407 | if (supportedLayers == null) { |
---|
408 | supportedLayers = new ArrayList<>(list.getLength()); |
---|
409 | } |
---|
410 | supportedLayers.add(new Layer(id, resultId, type, encoding, |
---|
411 | qualifier, altValueInfo, altValueInfoURI)); |
---|
412 | } |
---|
413 | } |
---|
414 | |
---|
415 | if ((supportedLayers != null) && |
---|
416 | !capabilities.contains(CAP_ADVANCED_SEARCH)) { |
---|
417 | logger.warn("Endpoint description has <SupportedLayer> but " + |
---|
418 | "does not indicate support for Advanced Search. " + |
---|
419 | "Please consider adding capability ({}) to " + |
---|
420 | "your endpoint description to make use of layers!", |
---|
421 | CAP_ADVANCED_SEARCH); |
---|
422 | } // necessary |
---|
423 | logger.debug("L: {}", supportedLayers); |
---|
424 | |
---|
425 | // resources |
---|
426 | exp = xpath.compile("/ed:EndpointDescription/ed:Resources/ed:Resource"); |
---|
427 | list = (NodeList) exp.evaluate(doc, XPathConstants.NODESET); |
---|
428 | final Set<String> pids = new HashSet<>(); |
---|
429 | List<ResourceInfo> resources = parseResources(xpath, list, pids, |
---|
430 | supportedDataViews, supportedLayers, version, hasAdvView); |
---|
431 | if ((resources == null) || resources.isEmpty()) { |
---|
432 | throw new SRUConfigException("No resources where " + |
---|
433 | "defined in endpoint description"); |
---|
434 | } |
---|
435 | if (logger.isDebugEnabled()) { |
---|
436 | logger.debug("Dumping ResourceInfo:"); |
---|
437 | dumpResourceInfo(1, resources); |
---|
438 | } |
---|
439 | |
---|
440 | return new SimpleEndpointDescription(version, |
---|
441 | capabilities, |
---|
442 | supportedDataViews, |
---|
443 | supportedLayers, |
---|
444 | resources, |
---|
445 | false); |
---|
446 | } |
---|
447 | |
---|
448 | |
---|
449 | private static void dumpResourceInfo(int depth, List<ResourceInfo> ris) { |
---|
450 | StringBuilder sb = new StringBuilder(); |
---|
451 | for (int i = 0; i < depth; i++) { |
---|
452 | sb.append("--"); |
---|
453 | } |
---|
454 | String pfx = sb.toString(); |
---|
455 | for (ResourceInfo ri : ris) { |
---|
456 | List<ResourceInfo> sris = ri.getSubResources(); |
---|
457 | logger.debug("{} {} (level={})", pfx, ri.getPid(), depth); |
---|
458 | if (sris != null) { |
---|
459 | dumpResourceInfo(depth + 1, sris); |
---|
460 | } |
---|
461 | } |
---|
462 | } |
---|
463 | |
---|
464 | |
---|
465 | private static List<ResourceInfo> parseResources(XPath xpath, |
---|
466 | NodeList nodes, Set<String> pids, List<DataView> supportedDataViews, |
---|
467 | List<Layer> supportedLayers, int version, boolean hasAdv) |
---|
468 | throws SRUConfigException, XPathExpressionException { |
---|
469 | List<ResourceInfo> ris = null; |
---|
470 | for (int k = 0; k < nodes.getLength(); k++) { |
---|
471 | final Element node = (Element) nodes.item(k); |
---|
472 | String pid = null; |
---|
473 | Map<String, String> titles = null; |
---|
474 | Map<String, String> descrs = null; |
---|
475 | String link = null; |
---|
476 | List<String> langs = null; |
---|
477 | List<DataView> availableDataViews = null; |
---|
478 | List<Layer> availableLayers = null; |
---|
479 | List<ResourceInfo> sub = null; |
---|
480 | |
---|
481 | pid = getAttribute(node, "pid"); |
---|
482 | if (pid == null) { |
---|
483 | throw new SRUConfigException("Element <ResourceInfo> " + |
---|
484 | "must have a proper 'pid' attribute"); |
---|
485 | } |
---|
486 | if (pids.contains(pid)) { |
---|
487 | throw new SRUConfigException("Another element <Resource> " + |
---|
488 | "with pid '" + pid + "' already exists"); |
---|
489 | } |
---|
490 | pids.add(pid); |
---|
491 | logger.debug("Processing resource with pid '{}'", pid); |
---|
492 | |
---|
493 | XPathExpression exp = xpath.compile("ed:Title"); |
---|
494 | NodeList list = (NodeList) exp.evaluate(node, |
---|
495 | XPathConstants.NODESET); |
---|
496 | if ((list != null) && (list.getLength() > 0)) { |
---|
497 | for (int i = 0; i < list.getLength(); i++) { |
---|
498 | final Element n = (Element) list.item(i); |
---|
499 | |
---|
500 | final String lang = getLangAttribute(n); |
---|
501 | if (lang == null) { |
---|
502 | throw new SRUConfigException("Element <Title> " + |
---|
503 | "must have a proper 'xml:lang' attribute"); |
---|
504 | } |
---|
505 | |
---|
506 | final String title = cleanString(n.getTextContent()); |
---|
507 | if (title == null) { |
---|
508 | throw new SRUConfigException("Element <Title> " + |
---|
509 | "must have a non-empty 'xml:lang' attribute"); |
---|
510 | } |
---|
511 | |
---|
512 | if (titles == null) { |
---|
513 | titles = new HashMap<>(); |
---|
514 | } |
---|
515 | if (titles.containsKey(lang)) { |
---|
516 | logger.warn("title with language '{}' already exists", |
---|
517 | lang); |
---|
518 | } else { |
---|
519 | logger.debug("title: '{}' '{}'", lang, title); |
---|
520 | titles.put(lang, title); |
---|
521 | } |
---|
522 | } |
---|
523 | if ((titles != null) && !titles.containsKey(LANG_EN)) { |
---|
524 | throw new SRUConfigException( |
---|
525 | "A <Title> with language 'en' is mandatory"); |
---|
526 | } |
---|
527 | } |
---|
528 | |
---|
529 | exp = xpath.compile("ed:Description"); |
---|
530 | list = (NodeList) exp.evaluate(node, XPathConstants.NODESET); |
---|
531 | if ((list != null) && (list.getLength() > 0)) { |
---|
532 | for (int i = 0; i < list.getLength(); i++) { |
---|
533 | Element n = (Element) list.item(i); |
---|
534 | |
---|
535 | String lang = getLangAttribute(n); |
---|
536 | if (lang == null) { |
---|
537 | throw new SRUConfigException("Element <Description> " + |
---|
538 | "must have a proper 'xml:lang' attribute"); |
---|
539 | |
---|
540 | } |
---|
541 | String desc = cleanString(n.getTextContent()); |
---|
542 | |
---|
543 | if (descrs == null) { |
---|
544 | descrs = new HashMap<>(); |
---|
545 | } |
---|
546 | |
---|
547 | if (descrs.containsKey(lang)) { |
---|
548 | logger.warn("description with language '{}' " + |
---|
549 | "already exists", lang); |
---|
550 | } else { |
---|
551 | logger.debug("description: '{}' '{}'", lang, desc); |
---|
552 | descrs.put(lang, desc); |
---|
553 | } |
---|
554 | } |
---|
555 | if ((descrs != null) && !descrs.containsKey(LANG_EN)) { |
---|
556 | throw new SRUConfigException( |
---|
557 | "A <Description> with language 'en' is mandatory"); |
---|
558 | } |
---|
559 | } |
---|
560 | |
---|
561 | exp = xpath.compile("ed:LandingPageURI"); |
---|
562 | list = (NodeList) exp.evaluate(node, XPathConstants.NODESET); |
---|
563 | if ((list != null) && (list.getLength() > 0)) { |
---|
564 | for (int i = 0; i < list.getLength(); i++) { |
---|
565 | Element n = (Element) list.item(i); |
---|
566 | link = cleanString(n.getTextContent()); |
---|
567 | } |
---|
568 | } |
---|
569 | |
---|
570 | exp = xpath.compile("ed:Languages/ed:Language"); |
---|
571 | list = (NodeList) exp.evaluate(node, XPathConstants.NODESET); |
---|
572 | if ((list != null) && (list.getLength() > 0)) { |
---|
573 | for (int i = 0; i < list.getLength(); i++) { |
---|
574 | Element n = (Element) list.item(i); |
---|
575 | |
---|
576 | String s = n.getTextContent(); |
---|
577 | if (s != null) { |
---|
578 | s = s.trim(); |
---|
579 | if (s.isEmpty()) { |
---|
580 | s = null; |
---|
581 | } |
---|
582 | } |
---|
583 | |
---|
584 | /* |
---|
585 | * enforce three letter codes |
---|
586 | */ |
---|
587 | if ((s == null) || (s.length() != 3)) { |
---|
588 | throw new SRUConfigException("Element <Language> " + |
---|
589 | "must use ISO-639-3 three letter " + |
---|
590 | "language codes"); |
---|
591 | } |
---|
592 | |
---|
593 | if (langs == null) { |
---|
594 | langs = new ArrayList<>(); |
---|
595 | } |
---|
596 | langs.add(s); |
---|
597 | } |
---|
598 | } |
---|
599 | |
---|
600 | exp = xpath.compile("ed:AvailableDataViews"); |
---|
601 | Node n = (Node) exp.evaluate(node, XPathConstants.NODE); |
---|
602 | if ((n != null) && (n instanceof Element)) { |
---|
603 | String ref = getAttribute((Element) n, "ref"); |
---|
604 | if (ref == null) { |
---|
605 | throw new SRUConfigException( |
---|
606 | "Element <AvailableDataViews> " + |
---|
607 | "must have a 'ref' attribute"); |
---|
608 | } |
---|
609 | String[] refs = ref.split("\\s+"); |
---|
610 | if ((refs == null) || (refs.length < 1)) { |
---|
611 | throw new SRUConfigException("Attribute 'ref' on element " + |
---|
612 | "<AvailableDataViews> must contain a whitespace " + |
---|
613 | "seperated list of data view references"); |
---|
614 | } |
---|
615 | |
---|
616 | for (String ref2 : refs) { |
---|
617 | DataView dataview = null; |
---|
618 | for (DataView dv : supportedDataViews) { |
---|
619 | if (ref2.equals(dv.getIdentifier())) { |
---|
620 | dataview = dv; |
---|
621 | break; |
---|
622 | } |
---|
623 | } |
---|
624 | if (dataview != null) { |
---|
625 | if (availableDataViews == null) { |
---|
626 | availableDataViews = new ArrayList<>(); |
---|
627 | } |
---|
628 | availableDataViews.add(dataview); |
---|
629 | } else { |
---|
630 | throw new SRUConfigException( |
---|
631 | "A data view with " + "identifier '" + ref2 + |
---|
632 | "' was not defined " + |
---|
633 | "in <SupportedDataViews>"); |
---|
634 | } |
---|
635 | } |
---|
636 | } else { |
---|
637 | throw new SRUConfigException( |
---|
638 | "missing element <AvailableDataViews>"); |
---|
639 | } |
---|
640 | if (availableDataViews == null) { |
---|
641 | throw new SRUConfigException("No available data views where " + |
---|
642 | "defined for resource with PID '" + pid + "'"); |
---|
643 | } |
---|
644 | |
---|
645 | exp = xpath.compile("ed:AvailableLayers"); |
---|
646 | n = (Node) exp.evaluate(node, XPathConstants.NODE); |
---|
647 | if ((n != null) && (n instanceof Element)) { |
---|
648 | String ref = getAttribute((Element) n, "ref"); |
---|
649 | if (ref == null) { |
---|
650 | throw new SRUConfigException("Element <AvailableLayers> " + |
---|
651 | "must have a 'ref' attribute"); |
---|
652 | } |
---|
653 | String[] refs = ref.split("\\s+"); |
---|
654 | if ((refs == null) || (refs.length < 1)) { |
---|
655 | throw new SRUConfigException("Attribute 'ref' on element " + |
---|
656 | "<AvailableLayers> must contain a whitespace " + |
---|
657 | "seperated list of data view references"); |
---|
658 | } |
---|
659 | |
---|
660 | for (String ref2 : refs) { |
---|
661 | Layer layer = null; |
---|
662 | for (Layer l : supportedLayers) { |
---|
663 | if (ref2.equals(l.getId())) { |
---|
664 | layer = l; |
---|
665 | break; |
---|
666 | } |
---|
667 | } |
---|
668 | if (layer != null) { |
---|
669 | if (availableLayers == null) { |
---|
670 | availableLayers = new ArrayList<>(); |
---|
671 | } |
---|
672 | availableLayers.add(layer); |
---|
673 | } else { |
---|
674 | throw new SRUConfigException("A layer with " + |
---|
675 | "identifier '" + ref2 + |
---|
676 | "' was not defined " + "in <SupportedLayers>"); |
---|
677 | } |
---|
678 | } |
---|
679 | } else { |
---|
680 | if (hasAdv) { |
---|
681 | logger.debug("no <SupportedLayers> for ressource '{}'", |
---|
682 | pid); |
---|
683 | } |
---|
684 | } |
---|
685 | |
---|
686 | exp = xpath.compile("ed:Resources/ed:Resource"); |
---|
687 | list = (NodeList) exp.evaluate(node, XPathConstants.NODESET); |
---|
688 | if ((list != null) && (list.getLength() > 0)) { |
---|
689 | sub = parseResources(xpath, list, pids, supportedDataViews, |
---|
690 | supportedLayers, version, hasAdv); |
---|
691 | } |
---|
692 | |
---|
693 | if (ris == null) { |
---|
694 | ris = new ArrayList<>(); |
---|
695 | } |
---|
696 | if ((availableLayers != null) && (version < 1)) { |
---|
697 | logger.warn("Endpoint claims to support FCS 1.0, but " + |
---|
698 | "includes information about <AvailableLayers> for " + |
---|
699 | "resource with pid '{}'", pid); |
---|
700 | } |
---|
701 | ris.add(new ResourceInfo(pid, titles, descrs, link, langs, |
---|
702 | availableDataViews, availableLayers, sub)); |
---|
703 | } |
---|
704 | return ris; |
---|
705 | } |
---|
706 | |
---|
707 | |
---|
708 | private static String getAttribute(Element el, String localName) { |
---|
709 | String value = el.getAttribute(localName); |
---|
710 | if (value != null) { |
---|
711 | value = value.trim(); |
---|
712 | if (!value.isEmpty()) { |
---|
713 | return value; |
---|
714 | } |
---|
715 | } |
---|
716 | return null; |
---|
717 | } |
---|
718 | |
---|
719 | |
---|
720 | private static String getLangAttribute(Element el) { |
---|
721 | String lang = el.getAttributeNS(XMLConstants.XML_NS_URI, "lang"); |
---|
722 | if (lang != null) { |
---|
723 | lang = lang.trim(); |
---|
724 | if (!lang.isEmpty()) { |
---|
725 | return lang; |
---|
726 | } |
---|
727 | } |
---|
728 | return null; |
---|
729 | } |
---|
730 | |
---|
731 | |
---|
732 | private static String cleanString(String s) { |
---|
733 | if (s != null) { |
---|
734 | s = s.trim(); |
---|
735 | if (!s.isEmpty()) { |
---|
736 | StringBuilder sb = new StringBuilder(); |
---|
737 | for (String z : s.split("\\s*\\n+\\s*")) { |
---|
738 | z = z.trim(); |
---|
739 | if (!z.isEmpty()) { |
---|
740 | if (sb.length() > 0) { |
---|
741 | sb.append(' '); |
---|
742 | } |
---|
743 | sb.append(z); |
---|
744 | } |
---|
745 | } |
---|
746 | if (sb.length() > 0) { |
---|
747 | return sb.toString(); |
---|
748 | } |
---|
749 | } |
---|
750 | } |
---|
751 | return null; |
---|
752 | } |
---|
753 | |
---|
754 | |
---|
755 | private static void checkLegacyMode(Document doc, URL url) |
---|
756 | throws SRUConfigException { |
---|
757 | Element root = doc.getDocumentElement(); |
---|
758 | if (root != null) { |
---|
759 | String ns = root.getNamespaceURI(); |
---|
760 | if (ns != null) { |
---|
761 | if (ns.equals(NS_LEGACY)) { |
---|
762 | logger.error("Detected out-dated " + |
---|
763 | "resource info catalog file '" + url + |
---|
764 | "'. Please update to the " + |
---|
765 | "current version"); |
---|
766 | throw new SRUConfigException("unsupport file format: " + ns); |
---|
767 | } else if (!ns.equals(NS)) { |
---|
768 | logger.error("Detected unsupported resource info " + |
---|
769 | "catalog file '" + url + "' with namespace '" + ns + '"'); |
---|
770 | throw new SRUConfigException("unsupport file format: " + ns); |
---|
771 | } |
---|
772 | } else { |
---|
773 | throw new SRUConfigException("No namespace URI was detected " + |
---|
774 | "for resource info catalog file '" + url +"'!"); |
---|
775 | } |
---|
776 | } else { |
---|
777 | throw new SRUConfigException("Error retrieving root element"); |
---|
778 | } |
---|
779 | } |
---|
780 | |
---|
781 | } // class SimpleResourceInfoInventoryParser |
---|