1 | package eu.clarin.sru.server.fcs; |
---|
2 | |
---|
3 | import java.util.Collections; |
---|
4 | import java.util.List; |
---|
5 | import java.util.Map; |
---|
6 | |
---|
7 | import javax.servlet.ServletContext; |
---|
8 | import javax.xml.XMLConstants; |
---|
9 | import javax.xml.stream.XMLStreamException; |
---|
10 | import javax.xml.stream.XMLStreamWriter; |
---|
11 | |
---|
12 | import org.slf4j.Logger; |
---|
13 | import org.slf4j.LoggerFactory; |
---|
14 | import org.z3950.zing.cql.CQLNode; |
---|
15 | import org.z3950.zing.cql.CQLRelation; |
---|
16 | import org.z3950.zing.cql.CQLTermNode; |
---|
17 | import org.z3950.zing.cql.Modifier; |
---|
18 | |
---|
19 | import eu.clarin.sru.server.SRUConfigException; |
---|
20 | import eu.clarin.sru.server.SRUConstants; |
---|
21 | import eu.clarin.sru.server.SRUDiagnosticList; |
---|
22 | import eu.clarin.sru.server.SRUException; |
---|
23 | import eu.clarin.sru.server.SRUExplainResult; |
---|
24 | import eu.clarin.sru.server.SRURequest; |
---|
25 | import eu.clarin.sru.server.SRUScanResultSet; |
---|
26 | import eu.clarin.sru.server.SRUSearchEngine; |
---|
27 | import eu.clarin.sru.server.SRUServerConfig; |
---|
28 | import eu.clarin.sru.server.utils.SRUSearchEngineBase; |
---|
29 | |
---|
30 | |
---|
31 | /** |
---|
32 | * A base class for implementing a simple search engine to be used as a CLARIN |
---|
33 | * FCS endpoint. |
---|
34 | * |
---|
35 | */ |
---|
36 | public abstract class SimpleEndpointSearchEngineBase extends |
---|
37 | SRUSearchEngineBase { |
---|
38 | private static final String FCS_RESOURCE_INFO_NS = |
---|
39 | "http://clarin.eu/fcs/1.0/resource-info"; |
---|
40 | private static final String X_CMD_RESOURCE_INFO = "x-cmd-resource-info"; |
---|
41 | private static final String FCS_SCAN_INDEX_FCS_RESOURCE = "fcs.resource"; |
---|
42 | private static final String FCS_SCAN_INDEX_CQL_SERVERCHOICE = "cql.serverChoice"; |
---|
43 | private static final String FCS_SCAN_SUPPORTED_RELATION_CQL_1_1 = "scr"; |
---|
44 | private static final String FCS_SCAN_SUPPORTED_RELATION_CQL_1_2 = "="; |
---|
45 | private static final String FCS_SUPPORTED_RELATION_EXACT = "exact"; |
---|
46 | private static final Logger logger = |
---|
47 | LoggerFactory.getLogger(SimpleEndpointSearchEngineBase.class); |
---|
48 | protected ResourceInfoInventory resourceInfoInventory; |
---|
49 | |
---|
50 | |
---|
51 | /** |
---|
52 | * This method should not be overridden. Perform your custom initialization |
---|
53 | * in the {@link #doInit(ServletContext, SRUServerConfig, Map)} method |
---|
54 | * Instead. |
---|
55 | * |
---|
56 | * @see #doInit(ServletContext, SRUServerConfig, Map) |
---|
57 | */ |
---|
58 | @Override |
---|
59 | public final void init(ServletContext context, SRUServerConfig config, |
---|
60 | Map<String, String> params) throws SRUConfigException { |
---|
61 | logger.debug("initializing"); |
---|
62 | super.init(context, config, params); |
---|
63 | |
---|
64 | logger.debug("initializing search engine implementation"); |
---|
65 | doInit(context, config, params); |
---|
66 | |
---|
67 | logger.debug("initizalizing resource info inventory"); |
---|
68 | this.resourceInfoInventory = createResourceInfoInventory(context, config, params); |
---|
69 | if (this.resourceInfoInventory == null) { |
---|
70 | logger.error("ClarinFCSSearchEngineBase implementation error: " + |
---|
71 | "initResourceCatalog() returned null"); |
---|
72 | throw new SRUConfigException("initResourceCatalog() returned no " + |
---|
73 | "valid implementation of a ResourceCatalog"); |
---|
74 | } |
---|
75 | } |
---|
76 | |
---|
77 | |
---|
78 | /** |
---|
79 | * This method should not be overridden. Perform you custom cleanup in the |
---|
80 | * {@link #doDestroy()} method. |
---|
81 | * |
---|
82 | * @see #doDestroy() |
---|
83 | */ |
---|
84 | @Override |
---|
85 | public final void destroy() { |
---|
86 | logger.debug("performing cleanup of resource info inventory"); |
---|
87 | resourceInfoInventory.destroy(); |
---|
88 | logger.debug("performing cleanup of search engine"); |
---|
89 | doDestroy(); |
---|
90 | super.destroy(); |
---|
91 | } |
---|
92 | |
---|
93 | |
---|
94 | @Override |
---|
95 | public final SRUExplainResult explain(SRUServerConfig config, |
---|
96 | SRURequest request, SRUDiagnosticList diagnostics) |
---|
97 | throws SRUException { |
---|
98 | final boolean provideResourceInfo = |
---|
99 | parseBoolean(request.getExtraRequestData(X_CMD_RESOURCE_INFO)); |
---|
100 | if (provideResourceInfo) { |
---|
101 | final List<ResourceInfo> resourceInfoList = |
---|
102 | resourceInfoInventory.getResourceInfoList( |
---|
103 | ResourceInfoInventory.PID_ROOT); |
---|
104 | return new SRUExplainResult(diagnostics) { |
---|
105 | |
---|
106 | @Override |
---|
107 | public boolean hasExtraResponseData() { |
---|
108 | return provideResourceInfo; |
---|
109 | } |
---|
110 | |
---|
111 | @Override |
---|
112 | public void writeExtraResponseData(XMLStreamWriter writer) |
---|
113 | throws XMLStreamException { |
---|
114 | writeFullResourceInfo(writer, null, resourceInfoList); |
---|
115 | } |
---|
116 | }; |
---|
117 | } else { |
---|
118 | return null; |
---|
119 | } |
---|
120 | } |
---|
121 | |
---|
122 | |
---|
123 | /** |
---|
124 | * Handle a <em>scan</em> operation. This implementation provides support to |
---|
125 | * CLARIN FCS resource enumeration. If you want to provide custom scan |
---|
126 | * behavior for a different index, override the |
---|
127 | * {@link #doScan(SRUServerConfig, SRURequest, SRUDiagnosticList)} method. |
---|
128 | * |
---|
129 | * @see #doScan(SRUServerConfig, SRURequest, SRUDiagnosticList) |
---|
130 | */ |
---|
131 | @Override |
---|
132 | public final SRUScanResultSet scan(SRUServerConfig config, |
---|
133 | SRURequest request, SRUDiagnosticList diagnostics) |
---|
134 | throws SRUException { |
---|
135 | /* |
---|
136 | * Check if we got a scan on fcs.resource. If yes, handle it |
---|
137 | * accordingly, otherwise delegate to user-provided implementation. |
---|
138 | */ |
---|
139 | final List<ResourceInfo> result = |
---|
140 | translateFcsScanResource(request.getScanClause()); |
---|
141 | if (result != null) { |
---|
142 | /* |
---|
143 | * Make sure, we honor the maximumTerms limit, of the client |
---|
144 | * requests it ... |
---|
145 | */ |
---|
146 | final int maxTerms |
---|
147 | = ((result.size() > 0) && (request.getMaximumTerms() > 0)) |
---|
148 | ? Math.min(result.size(), request.getMaximumTerms()) |
---|
149 | : result.size(); |
---|
150 | |
---|
151 | /* |
---|
152 | * Shall we provide extended resource information ... ? |
---|
153 | */ |
---|
154 | final boolean provideResourceInfo = parseBoolean( |
---|
155 | request.getExtraRequestData(X_CMD_RESOURCE_INFO)); |
---|
156 | |
---|
157 | return new SRUScanResultSet(diagnostics) { |
---|
158 | private int idx = -1; |
---|
159 | |
---|
160 | @Override |
---|
161 | public boolean nextTerm() { |
---|
162 | return (result != null) && (++idx < maxTerms); |
---|
163 | } |
---|
164 | |
---|
165 | |
---|
166 | @Override |
---|
167 | public String getValue() { |
---|
168 | return result.get(idx).getPid(); |
---|
169 | } |
---|
170 | |
---|
171 | |
---|
172 | @Override |
---|
173 | public int getNumberOfRecords() { |
---|
174 | return result.get(idx).getResourceCount(); |
---|
175 | } |
---|
176 | |
---|
177 | |
---|
178 | @Override |
---|
179 | public String getDisplayTerm() { |
---|
180 | return result.get(idx).getTitle("en"); |
---|
181 | } |
---|
182 | |
---|
183 | |
---|
184 | @Override |
---|
185 | public WhereInList getWhereInList() { |
---|
186 | return null; |
---|
187 | } |
---|
188 | |
---|
189 | |
---|
190 | @Override |
---|
191 | public boolean hasExtraTermData() { |
---|
192 | return provideResourceInfo; |
---|
193 | } |
---|
194 | |
---|
195 | |
---|
196 | @Override |
---|
197 | public void writeExtraTermData(XMLStreamWriter writer) |
---|
198 | throws XMLStreamException { |
---|
199 | if (provideResourceInfo) { |
---|
200 | writeResourceInfo(writer, null, result.get(idx)); |
---|
201 | } |
---|
202 | } |
---|
203 | }; |
---|
204 | } else { |
---|
205 | return doScan(config, request, diagnostics); |
---|
206 | } |
---|
207 | } |
---|
208 | |
---|
209 | |
---|
210 | |
---|
211 | /** |
---|
212 | * Create the resource info inventory to be used with this endpoint. |
---|
213 | * Implement this method to provide an implementation of a |
---|
214 | * {@link ResourceInfoInventory} that is tailored towards your environment |
---|
215 | * and needs. |
---|
216 | * |
---|
217 | * @param context |
---|
218 | * the {@link ServletContext} for the Servlet |
---|
219 | * @param config |
---|
220 | * the {@link SRUServerConfig} object for this search engine |
---|
221 | * @param params |
---|
222 | * additional parameters gathered from the Servlet configuration |
---|
223 | * and Servlet context. |
---|
224 | * @return an instance of a {@link ResourceInfoInventory} used by this |
---|
225 | * search engine |
---|
226 | * @throws SRUConfigException |
---|
227 | * if an error occurred |
---|
228 | */ |
---|
229 | protected abstract ResourceInfoInventory createResourceInfoInventory( |
---|
230 | ServletContext context, SRUServerConfig config, |
---|
231 | Map<String, String> params) throws SRUConfigException; |
---|
232 | |
---|
233 | |
---|
234 | /** |
---|
235 | * Initialize the search engine. This initialization should be tailed |
---|
236 | * towards your environment and needs. |
---|
237 | * |
---|
238 | * @param context |
---|
239 | * the {@link ServletContext} for the Servlet |
---|
240 | * @param config |
---|
241 | * the {@link SRUServerConfig} object for this search engine |
---|
242 | * @param params |
---|
243 | * additional parameters gathered from the Servlet configuration |
---|
244 | * and Servlet context. |
---|
245 | * @throws SRUConfigException |
---|
246 | * if an error occurred |
---|
247 | */ |
---|
248 | protected abstract void doInit(ServletContext context, |
---|
249 | SRUServerConfig config, Map<String, String> params) |
---|
250 | throws SRUConfigException; |
---|
251 | |
---|
252 | |
---|
253 | /** |
---|
254 | * Destroy the search engine. Override this method for any cleanup the |
---|
255 | * search engine needs to perform upon termination. |
---|
256 | */ |
---|
257 | protected void doDestroy() { |
---|
258 | } |
---|
259 | |
---|
260 | |
---|
261 | /** |
---|
262 | * Handle a <em>explain</em> operation. The default implementation is a |
---|
263 | * no-op. Override this method, if you want to provide a custom behavior. |
---|
264 | * |
---|
265 | * @see SRUSearchEngine#explain(SRUServerConfig, SRURequest, |
---|
266 | * SRUDiagnosticList) |
---|
267 | */ |
---|
268 | protected SRUScanResultSet doScan(SRUServerConfig config, |
---|
269 | SRURequest request, SRUDiagnosticList diagnostics) |
---|
270 | throws SRUException { |
---|
271 | final CQLNode scanClause = request.getScanClause(); |
---|
272 | if (scanClause instanceof CQLTermNode) { |
---|
273 | final CQLTermNode root = (CQLTermNode) scanClause; |
---|
274 | final String index = root.getIndex(); |
---|
275 | throw new SRUException(SRUConstants.SRU_UNSUPPORTED_INDEX, index, |
---|
276 | "scan operation on index '" + index + "' is not supported"); |
---|
277 | } else { |
---|
278 | throw new SRUException(SRUConstants.SRU_QUERY_FEATURE_UNSUPPORTED, |
---|
279 | "Scan clause too complex."); |
---|
280 | } |
---|
281 | } |
---|
282 | |
---|
283 | |
---|
284 | /** |
---|
285 | * Convince method for parsing a string to boolean. Values <code>1</code>, |
---|
286 | * <code>true</code>, <code>yes</code> yield a <em>true</em> boolean value |
---|
287 | * as a result, all others (including <code>null</code>) a <em>false</em> |
---|
288 | * boolean value. |
---|
289 | * |
---|
290 | * @param value |
---|
291 | * the string to parse |
---|
292 | * @return <code>true</code> if the supplied string was considered something |
---|
293 | * representing a <em>true</em> boolean value, <code>false</code> |
---|
294 | * otherwise |
---|
295 | */ |
---|
296 | protected static boolean parseBoolean(String value) { |
---|
297 | if (value != null) { |
---|
298 | return value.equals("1") || Boolean.parseBoolean(value); |
---|
299 | } |
---|
300 | return false; |
---|
301 | } |
---|
302 | |
---|
303 | |
---|
304 | private List<ResourceInfo> translateFcsScanResource(CQLNode scanClause) |
---|
305 | throws SRUException { |
---|
306 | if (scanClause instanceof CQLTermNode) { |
---|
307 | final CQLTermNode root = (CQLTermNode) scanClause; |
---|
308 | logger.debug("index = '{}', relation = '{}', term = '{}'", |
---|
309 | new Object[] { root.getIndex(), |
---|
310 | root.getRelation().getBase(), root.getTerm() }); |
---|
311 | |
---|
312 | String index = root.getIndex(); |
---|
313 | if (FCS_SCAN_INDEX_CQL_SERVERCHOICE.equals(index) && |
---|
314 | FCS_SCAN_INDEX_FCS_RESOURCE.equals(root.getTerm())) { |
---|
315 | throw new SRUException(SRUConstants.SRU_UNSUPPORTED_INDEX, |
---|
316 | "scan operation with 'scanClause' with value " + |
---|
317 | "'fcs.resource' is deprecated within CLARIN-FCS"); |
---|
318 | } |
---|
319 | if (!(FCS_SCAN_INDEX_FCS_RESOURCE.equals(index))) { |
---|
320 | logger.debug("got scan operation on index '{}', bailing ...", |
---|
321 | index); |
---|
322 | return null; |
---|
323 | } |
---|
324 | |
---|
325 | |
---|
326 | // only allow "=" relation without any modifiers |
---|
327 | final CQLRelation relationNode = root.getRelation(); |
---|
328 | String relation = relationNode.getBase(); |
---|
329 | if (!(FCS_SCAN_SUPPORTED_RELATION_CQL_1_1.equals(relation) || |
---|
330 | FCS_SCAN_SUPPORTED_RELATION_CQL_1_2.equals(relation) || |
---|
331 | FCS_SUPPORTED_RELATION_EXACT.equals(relation))) { |
---|
332 | throw new SRUException(SRUConstants.SRU_UNSUPPORTED_RELATION, |
---|
333 | relationNode.getBase(), "Relation \"" + |
---|
334 | relationNode.getBase() + |
---|
335 | "\" is not supported in scan operation."); |
---|
336 | } |
---|
337 | final List<Modifier> modifiers = relationNode.getModifiers(); |
---|
338 | if ((modifiers != null) && !modifiers.isEmpty()) { |
---|
339 | Modifier modifier = modifiers.get(0); |
---|
340 | throw new SRUException( |
---|
341 | SRUConstants.SRU_UNSUPPORTED_RELATION_MODIFIER, |
---|
342 | modifier.getValue(), "Relation modifier \"" + |
---|
343 | modifier.getValue() + |
---|
344 | "\" is not supported in scan operation."); |
---|
345 | } |
---|
346 | |
---|
347 | final String term = root.getTerm(); |
---|
348 | if ((term == null) || term.isEmpty()) { |
---|
349 | throw new SRUException(SRUConstants.SRU_EMPTY_TERM_UNSUPPORTED, |
---|
350 | "An empty term is not supported in scan operation."); |
---|
351 | } |
---|
352 | |
---|
353 | /* |
---|
354 | * generate result: currently we only have a flat hierarchy, so |
---|
355 | * return an empty result on any attempt to do a recursive scan ... |
---|
356 | */ |
---|
357 | List<ResourceInfo> results = null; |
---|
358 | if ((FCS_SCAN_INDEX_CQL_SERVERCHOICE.equals(index) && |
---|
359 | FCS_SCAN_INDEX_FCS_RESOURCE.equals(term)) || |
---|
360 | (FCS_SCAN_INDEX_FCS_RESOURCE.equals(index))) { |
---|
361 | results = resourceInfoInventory.getResourceInfoList(term); |
---|
362 | } |
---|
363 | if ((results == null) || results.isEmpty()) { |
---|
364 | return Collections.emptyList(); |
---|
365 | } else { |
---|
366 | return results; |
---|
367 | } |
---|
368 | } else { |
---|
369 | throw new SRUException(SRUConstants.SRU_QUERY_FEATURE_UNSUPPORTED, |
---|
370 | "Scan clause too complex."); |
---|
371 | } |
---|
372 | } |
---|
373 | |
---|
374 | |
---|
375 | private static void writeFullResourceInfo(XMLStreamWriter writer, |
---|
376 | String prefix, List<ResourceInfo> resourceInfoList) |
---|
377 | throws XMLStreamException { |
---|
378 | if (resourceInfoList == null) { |
---|
379 | throw new NullPointerException("resourceInfoList == null"); |
---|
380 | } |
---|
381 | if (!resourceInfoList.isEmpty()) { |
---|
382 | final boolean defaultNS = ((prefix == null) || prefix.isEmpty()); |
---|
383 | if (defaultNS) { |
---|
384 | writer.setDefaultNamespace(FCS_RESOURCE_INFO_NS); |
---|
385 | } else { |
---|
386 | writer.setPrefix(prefix, FCS_RESOURCE_INFO_NS); |
---|
387 | } |
---|
388 | writer.writeStartElement(FCS_RESOURCE_INFO_NS, "ResourceCollection"); |
---|
389 | if (defaultNS) { |
---|
390 | writer.writeDefaultNamespace(FCS_RESOURCE_INFO_NS); |
---|
391 | } else { |
---|
392 | writer.writeNamespace(prefix, FCS_RESOURCE_INFO_NS); |
---|
393 | } |
---|
394 | for (ResourceInfo resourceInfo : resourceInfoList) { |
---|
395 | doWriteResourceInfo(writer, prefix, resourceInfo, false, true); |
---|
396 | } |
---|
397 | writer.writeEndElement(); // "ResourceCollection" element |
---|
398 | } |
---|
399 | } |
---|
400 | |
---|
401 | |
---|
402 | private static void writeResourceInfo(XMLStreamWriter writer, String prefix, |
---|
403 | ResourceInfo resourceInfo) throws XMLStreamException { |
---|
404 | if (resourceInfo == null) { |
---|
405 | throw new NullPointerException("resourceInfo == null"); |
---|
406 | } |
---|
407 | doWriteResourceInfo(writer, prefix, resourceInfo, true, false); |
---|
408 | } |
---|
409 | |
---|
410 | |
---|
411 | private static void doWriteResourceInfo(XMLStreamWriter writer, |
---|
412 | String prefix, ResourceInfo resourceInfo, boolean writeNS, |
---|
413 | boolean recursive) throws XMLStreamException { |
---|
414 | final boolean defaultNS = ((prefix == null) || prefix.isEmpty()); |
---|
415 | if (writeNS) { |
---|
416 | if (defaultNS) { |
---|
417 | writer.setDefaultNamespace(FCS_RESOURCE_INFO_NS); |
---|
418 | } else { |
---|
419 | writer.setPrefix(prefix, FCS_RESOURCE_INFO_NS); |
---|
420 | } |
---|
421 | } |
---|
422 | writer.writeStartElement(FCS_RESOURCE_INFO_NS, "ResourceInfo"); |
---|
423 | if (writeNS) { |
---|
424 | if (defaultNS) { |
---|
425 | writer.writeDefaultNamespace(FCS_RESOURCE_INFO_NS); |
---|
426 | } else { |
---|
427 | writer.writeNamespace(prefix, FCS_RESOURCE_INFO_NS); |
---|
428 | } |
---|
429 | } |
---|
430 | if (recursive) { |
---|
431 | /* |
---|
432 | * HACK: only output @pid for recursive (= explain) requests. |
---|
433 | * This should be revisited, if we decide to go for the explain |
---|
434 | * style enumeration of resources. |
---|
435 | */ |
---|
436 | writer.writeAttribute("pid", resourceInfo.getPid()); |
---|
437 | } |
---|
438 | if (resourceInfo.hasSubResources()) { |
---|
439 | writer.writeAttribute("hasSubResources", "true"); |
---|
440 | } |
---|
441 | |
---|
442 | final Map<String, String> title = resourceInfo.getTitle(); |
---|
443 | for (Map.Entry<String, String> i : title.entrySet()) { |
---|
444 | writer.setPrefix(XMLConstants.XML_NS_PREFIX, |
---|
445 | XMLConstants.XML_NS_URI); |
---|
446 | writer.writeStartElement(FCS_RESOURCE_INFO_NS, "Title"); |
---|
447 | writer.writeAttribute(XMLConstants.XML_NS_URI, "lang", i.getKey()); |
---|
448 | writer.writeCharacters(i.getValue()); |
---|
449 | writer.writeEndElement(); // "title" element |
---|
450 | } |
---|
451 | |
---|
452 | final Map<String, String> description = resourceInfo.getDescription(); |
---|
453 | if (description != null) { |
---|
454 | for (Map.Entry<String, String> i : description.entrySet()) { |
---|
455 | writer.writeStartElement(FCS_RESOURCE_INFO_NS, "Description"); |
---|
456 | writer.writeAttribute(XMLConstants.XML_NS_URI, "lang", |
---|
457 | i.getKey()); |
---|
458 | writer.writeCharacters(i.getValue()); |
---|
459 | writer.writeEndElement(); // "Description" element |
---|
460 | } |
---|
461 | } |
---|
462 | |
---|
463 | final String landingPageURI = resourceInfo.getLandingPageURI(); |
---|
464 | if (landingPageURI != null) { |
---|
465 | writer.writeStartElement(FCS_RESOURCE_INFO_NS, "LandingPageURI"); |
---|
466 | writer.writeCharacters(landingPageURI); |
---|
467 | writer.writeEndElement(); // "LandingPageURI" element |
---|
468 | } |
---|
469 | |
---|
470 | final List<String> languages = resourceInfo.getLanguages(); |
---|
471 | writer.writeStartElement(FCS_RESOURCE_INFO_NS, "Languages"); |
---|
472 | for (String i : languages) { |
---|
473 | writer.writeStartElement(FCS_RESOURCE_INFO_NS, "Language"); |
---|
474 | writer.writeCharacters(i); |
---|
475 | writer.writeEndElement(); // "Language" element |
---|
476 | |
---|
477 | } |
---|
478 | writer.writeEndElement(); // "Languages" element |
---|
479 | |
---|
480 | if (recursive && resourceInfo.hasSubResources()) { |
---|
481 | writer.writeStartElement(FCS_RESOURCE_INFO_NS, |
---|
482 | "ResourceInfoCollection"); |
---|
483 | for (ResourceInfo r : resourceInfo.getSubResources()) { |
---|
484 | doWriteResourceInfo(writer, prefix, r, writeNS, recursive); |
---|
485 | } |
---|
486 | writer.writeEndElement(); // "ResourceCollection" element |
---|
487 | } |
---|
488 | writer.writeEndElement(); // "ResourceInfo" element |
---|
489 | } |
---|
490 | |
---|
491 | } // class SimpleEndpointSearchEngineBase |
---|