Changeset 2760 for SRUCQIBridge
- Timestamp:
- 03/28/13 10:52:25 (11 years ago)
- Location:
- SRUCQIBridge
- Files:
-
- 1 added
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
SRUCQIBridge/pom.xml
r2723 r2760 3 3 <modelVersion>4.0.0</modelVersion> 4 4 <artifactId>sru-cqibridge</artifactId> 5 <version>1. 1</version>5 <version>1.2</version> 6 6 <packaging>war</packaging> 7 7 … … 25 25 <groupId>eu.clarin.sru</groupId> 26 26 <artifactId>sru-server</artifactId> 27 <version>1. 4.2</version>27 <version>1.5.0</version> 28 28 </dependency> 29 29 <dependency> … … 36 36 <artifactId>slf4j-simple</artifactId> 37 37 <version>1.6.4</version> 38 </dependency> 39 <dependency> 40 <groupId>eu.clarin.sru.fcs</groupId> 41 <artifactId>fcs-simple-endpoint</artifactId> 42 <version>1.0.0</version> 43 </dependency> 44 <dependency> 45 <groupId>javax.servlet</groupId> 46 <artifactId>servlet-api</artifactId> 47 <version>2.5</version> 38 48 </dependency> 39 49 </dependencies> -
SRUCQIBridge/src/main/java/eu/clarin/sru/cqibridge/CqiSRUSearchEngine.java
r2479 r2760 20 20 import eu.clarin.cqi.client.CqiResult; 21 21 import eu.clarin.sru.server.*; 22 import eu.clarin.sru.server.utils.SRUSearchEngineBase; 23 import java.util.Arrays; 22 import eu.clarin.sru.server.fcs.ResourceInfoInventory; 23 import eu.clarin.sru.server.fcs.SimpleEndpointSearchEngineBase; 24 import eu.clarin.sru.server.fcs.XMLStreamWriterHelper; 25 import eu.clarin.sru.server.fcs.utils.SimpleResourceInfoInventoryParser; 26 import java.net.MalformedURLException; 27 import java.util.List; 24 28 import java.util.Map; 25 29 import java.util.NoSuchElementException; 26 import java.util.Vector;27 30 import java.util.regex.Pattern; 31 import javax.servlet.ServletContext; 28 32 import javax.xml.stream.XMLStreamException; 29 33 import javax.xml.stream.XMLStreamWriter; … … 39 43 * @author akislev 40 44 */ 41 public class CqiSRUSearchEngine extends S RUSearchEngineBase {45 public class CqiSRUSearchEngine extends SimpleEndpointSearchEngineBase { 42 46 43 47 private static final String PARAM_CQI_SERVER_HOST = "cqi.serverHost"; … … 48 52 private static final String PARAM_CQI_DEFAULT_CORPUS_PID = "cqi.defaultCorpusPID"; 49 53 private static final String PARAM_CQI_DEFAULT_CORPUS_REF = "cqi.defaultCorpusRef"; 54 private static final String RESOURCE_INFO_INVENTORY_URL = 55 "/WEB-INF/resource-info.xml"; 50 56 private static final String CQI_SUPPORTED_RELATION_CQL_1_1 = "scr"; 51 57 private static final String CQI_SUPPORTED_RELATION_CQL_1_2 = "="; … … 53 59 private static final String INDEX_CQL_SERVERCHOICE = "cql.serverChoice"; 54 60 private static final String INDEX_FCS_WORDS = "words"; 55 private static final String INDEX_FCS_RESOURCE = "fcs.resource"; 56 private static final String FCS_RESOURCE_TERM_WILDCARD = "*"; 57 private static final String FCS_RESOURCE_TERM_ANY = "any"; 58 private static final String FCS_NS = "http://clarin.eu/fcs/1.0"; 59 private static final String FCS_PREFIX = "fcs"; 60 private static final String FCS_KWIC_NS = "http://clarin.eu/fcs/1.0/kwic"; 61 private static final String FCS_KWIC_PREFIX = "kwic"; 62 private static final String CLARIN_FCS_RECORD_SCHEMA = FCS_NS; 63 private static final String X_CMD_RESOURCE_INFO = "x-cmd-resource-info"; 61 private static final String CLARIN_FCS_RECORD_SCHEMA = 62 "http://clarin.eu/fcs/1.0"; 64 63 private static final Pattern SPACE_PATTERN = Pattern.compile("\\s+"); 65 64 private static final String WORD_POSITIONAL_ATTRIBUTE = "word"; … … 67 66 private static final Logger logger = 68 67 LoggerFactory.getLogger(CqiSRUSearchEngine.class); 69 private static final ResourceInfo[] RESOURCE_INFOS = new ResourceInfo[]{70 new ResourceInfo("tueba-ddc", -1, false,71 Arrays.asList("en", "TuebaDDC",72 "de", "TÃŒbaDDC"),73 Arrays.asList("en", "TÃŒbingen Treebank of Written German - Diachronic Corpus.",74 "de", "TÃŒbingen Baumbank des Deutschen - Diachrones Corpus."),75 Arrays.asList("deu"),76 Arrays.asList("text", "fcs.words")),};77 68 private CqiClient client; 78 69 private String defaultCorpusName; … … 81 72 82 73 @Override 83 public void init(SRUServerConfig config, Map<String, String> params) 84 throws SRUConfigException { 74 protected void doInit(ServletContext context, SRUServerConfig config, 75 Map<String, String> params) throws SRUConfigException { 76 77 /* 78 * Perform search engine specific initialization in this method, e.g. 79 * set up a database connection, etc. 80 */ 85 81 final String serverHost = params.get(PARAM_CQI_SERVER_HOST); 86 82 if (serverHost == null) { … … 124 120 client = new CqiClient(serverHost, serverPort); 125 121 } catch (CqiClientException ex) { 126 throw new SRUConfigException("can't initialize acqi client", ex);122 throw new SRUConfigException("can't initialize the cqi client", ex); 127 123 } 128 124 try { … … 134 130 135 131 @Override 136 public SRUExplainResult explain(SRUServerConfig config, 137 SRURequest request, SRUDiagnosticList diagnostics) 138 throws SRUException { 139 return null; 140 } 141 142 @Override 143 public SRUScanResultSet scan(SRUServerConfig config, SRURequest request, 144 SRUDiagnosticList diagnostics) throws SRUException { 132 protected ResourceInfoInventory createResourceInfoInventory( 133 ServletContext context, SRUServerConfig config, 134 Map<String, String> params) throws SRUConfigException { 145 135 /* 146 * handle scan on CLARIN FCS fcs.resource; 147 * otherwise return an empty scan result set ... 136 * Create a new instance of a class that implements the 137 * ResourceInfoInventory interface and return it. The resource info 138 * inventory is used for endpoint resource enumeration (see CLARIN FCS 139 * specification) 148 140 */ 149 final ResourceInfo[] result = 150 translateFcsScanResource(request.getScanClause()); 151 final boolean provideResourceInfo = (result != null) 152 && parseBoolean(request.getExtraRequestData(X_CMD_RESOURCE_INFO)); 153 return new SRUScanResultSet(diagnostics) { 154 private int idx = -1; 155 156 @Override 157 public boolean nextTerm() { 158 return (result != null) && (++idx < result.length); 159 } 160 161 @Override 162 public String getValue() { 163 return result[idx].getCorpusId(); 164 } 165 166 @Override 167 public int getNumberOfRecords() { 168 return result[idx].getResourceCount(); 169 } 170 171 @Override 172 public String getDisplayTerm() { 173 return null; 174 } 175 176 @Override 177 public SRUScanResultSet.WhereInList getWhereInList() { 178 return null; 179 } 180 181 @Override 182 public boolean hasExtraTermData() { 183 return provideResourceInfo; 184 } 185 186 @Override 187 public void writeExtraTermData(XMLStreamWriter writer) 188 throws XMLStreamException { 189 if (provideResourceInfo) { 190 result[idx].writeResourceInfo(writer, null); 191 } 192 } 193 }; 141 try { 142 return SimpleResourceInfoInventoryParser.parse( 143 context.getResource(RESOURCE_INFO_INVENTORY_URL)); 144 } catch (MalformedURLException e) { 145 throw new SRUConfigException( 146 "error initializing resource info inventory", e); 147 } 148 194 149 } 195 150 … … 260 215 public boolean nextRecord() { 261 216 try { 262 return pos++< maximumRecords && result.next();217 return ++pos < maximumRecords && result.next(); 263 218 } catch (CqiClientException e) { 264 219 throw new NoSuchElementException(e.getMessage()); … … 276 231 final int contextStart = result.getContextStart(); 277 232 final int contextEnd = result.getContextEnd(); 278 final int matchStart = result.getMatchStart(); 279 final int matchEnd = result.getMatchEnd(); 280 final int relMatchStart = matchStart - contextStart; 281 final int relMatchEnd = matchEnd - contextStart + 1; 233 final int relMatchStart = result.getMatchStart() - contextStart; 234 final int relMatchEnd = result.getMatchEnd() - contextStart + 1; 282 235 final int relContextEnd = contextEnd - contextStart + 1; 283 final StringBuilder leftContext = new StringBuilder();284 final StringBuilder keyWord = new StringBuilder();285 final StringBuilder rightContext = new StringBuilder();286 236 String[] words; 287 237 try { … … 290 240 throw new XMLStreamException("can't obtain the values of the positional attribute '" + WORD_POSITIONAL_ATTRIBUTE + "'", e); 291 241 } 292 boolean isFirst = true; 293 for (int i = 0; i < relMatchStart; i++) { 294 if (isFirst) { 295 isFirst = false; 296 } else { 297 leftContext.append(' '); 298 } 299 leftContext.append(words[i]); 300 } 301 isFirst = true; 302 for (int i = relMatchStart; i < relMatchEnd; i++) { 303 if (isFirst) { 304 isFirst = false; 305 } else { 306 keyWord.append(' '); 307 } 308 keyWord.append(words[i]); 309 } 310 isFirst = true; 311 for (int i = relMatchEnd; i < relContextEnd; i++) { 312 if (isFirst) { 313 isFirst = false; 314 } else { 315 rightContext.append(' '); 316 } 317 rightContext.append(words[i]); 318 } 319 writer.setPrefix(FCS_PREFIX, FCS_NS); 320 writer.writeStartElement(FCS_NS, "Resource"); 321 writer.writeNamespace(FCS_PREFIX, FCS_NS); 322 writer.writeAttribute("pid", defaultCorpusPID); 323 writer.writeAttribute("ref", defaultCorpusRef); 324 writer.writeStartElement(FCS_NS, "DataView"); 325 writer.writeAttribute("type", "kwic"); 326 327 writer.setPrefix(FCS_KWIC_PREFIX, FCS_KWIC_NS); 328 writer.writeStartElement(FCS_KWIC_NS, "kwic"); 329 writer.writeNamespace(FCS_KWIC_PREFIX, FCS_KWIC_NS); 330 331 writer.writeStartElement(FCS_KWIC_NS, "c"); 332 writer.writeAttribute("type", "left"); 333 writer.writeCharacters(leftContext.toString()); 334 writer.writeEndElement(); // "c" element 335 336 writer.writeStartElement(FCS_KWIC_NS, "kw"); 337 writer.writeCharacters(keyWord.toString()); 338 writer.writeEndElement(); // "kw" element 339 340 writer.writeStartElement(FCS_KWIC_NS, "c"); 341 writer.writeAttribute("type", "right"); 342 writer.writeCharacters(rightContext.toString()); 343 writer.writeEndElement(); // "c" element 344 345 writer.writeEndElement(); // "kwic" element 346 347 writer.writeEndElement(); // "DataView" element 348 writer.writeEndElement(); // "Resource" element 242 String leftContext = matchToString(words, 0, relMatchStart); 243 String keyWord = matchToString(words, relMatchStart, relMatchEnd); 244 String rightContext = matchToString(words, relMatchEnd, relContextEnd); 245 XMLStreamWriterHelper.writeResourceWithKWICDataView(writer, 246 defaultCorpusPID, defaultCorpusRef, 247 leftContext, keyWord, 248 rightContext); 349 249 } 350 250 }; … … 357 257 } 358 258 359 private ResourceInfo[] translateFcsScanResource(CQLNode query) 360 throws SRUException { 361 if (query instanceof CQLTermNode) { 362 final CQLTermNode root = (CQLTermNode) query; 363 logger.debug("index = '{}', relation = '{}', term = '{}'", 364 new Object[]{root.getIndex(), 365 root.getRelation().getBase(), root.getTerm()}); 366 367 String index = root.getIndex(); 368 if (!(INDEX_FCS_RESOURCE.equals(index) || INDEX_CQL_SERVERCHOICE.equals(index))) { 369 throw new SRUException(SRUConstants.SRU_UNSUPPORTED_INDEX, 370 root.getIndex(), "Index \"" + root.getIndex() 371 + "\" is not supported in scan operation."); 372 } 373 374 375 // only allow "=" relation without any modifiers 376 final CQLRelation relationNode = root.getRelation(); 377 String relation = relationNode.getBase(); 378 if (!(CQI_SUPPORTED_RELATION_CQL_1_1.equals(relation) 379 || CQI_SUPPORTED_RELATION_CQL_1_2.equals(relation) 380 || CQI_SUPPORTED_RELATION_EXACT.equals(relation))) { 381 throw new SRUException(SRUConstants.SRU_UNSUPPORTED_RELATION, 382 relationNode.getBase(), "Relation \"" 383 + relationNode.getBase() 384 + "\" is not supported in scan operation."); 385 } 386 Vector<Modifier> modifiers = relationNode.getModifiers(); 387 if ((modifiers != null) && !modifiers.isEmpty()) { 388 Modifier modifier = modifiers.get(0); 389 throw new SRUException( 390 SRUConstants.SRU_UNSUPPORTED_RELATION_MODIFIER, 391 modifier.getValue(), "Relation modifier \"" 392 + modifier.getValue() 393 + "\" is not supported in scan operation."); 394 } 395 396 String term = root.getTerm(); 397 if ((term == null) || term.isEmpty()) { 398 throw new SRUException(SRUConstants.SRU_EMPTY_TERM_UNSUPPORTED, 399 "An empty term is not supported in scan operation."); 400 } 401 402 /* 403 * generate result: currently we only have a flat hierarchy, so 404 * return an empty result on any attempt to do a recursive scan ... 405 */ 406 if ((INDEX_CQL_SERVERCHOICE.equals(index) 407 && INDEX_FCS_RESOURCE.equals(term)) 408 || (INDEX_FCS_RESOURCE.equals(index) 409 && (FCS_RESOURCE_TERM_WILDCARD.equals(term) 410 || FCS_RESOURCE_TERM_ANY.equalsIgnoreCase(term)))) { 411 return RESOURCE_INFOS; 259 private static String matchToString(String[] words, int fromIndex, int toIndex) { 260 final StringBuilder sb = new StringBuilder(); 261 boolean isFirst = true; 262 for (int i = fromIndex; i < toIndex; i++) { 263 if (isFirst) { 264 isFirst = false; 412 265 } else { 413 return null; 414 } 415 } else { 416 throw new SRUException(SRUConstants.SRU_QUERY_FEATURE_UNSUPPORTED, 417 "Scan clause too complex."); 418 } 266 sb.append(' '); 267 } 268 sb.append(words[i]); 269 } 270 return sb.toString(); 419 271 } 420 272 … … 441 293 + relation.getBase() + "\" is not supported."); 442 294 } 443 Vector<Modifier> modifiers = relation.getModifiers();295 List<Modifier> modifiers = relation.getModifiers(); 444 296 if ((modifiers != null) && !modifiers.isEmpty()) { 445 297 Modifier modifier = modifiers.get(0); … … 461 313 } 462 314 throw new SRUException(SRUConstants.SRU_QUERY_FEATURE_UNSUPPORTED, 463 "Server currently supportes term-only query"315 "Server currently only supports term-only queries " 464 316 + "(CQL conformance level 0)."); 465 317 } 466 467 private boolean parseBoolean(String value) {468 if (value != null) {469 return value.endsWith("1") || Boolean.parseBoolean(value);470 }471 return false;472 }473 318 } -
SRUCQIBridge/src/main/webapp/WEB-INF/web.xml
r2722 r2760 33 33 <init-param> 34 34 <param-name>cqi.defaultCorpusPID</param-name> 35 <param-value> 11858/00-1778-0000-0001-DDAF-D</param-value>35 <param-value>http://hdl.handle.net/11858/00-1778-0000-0001-DDAF-D</param-value> 36 36 </init-param> 37 37 <init-param>
Note: See TracChangeset
for help on using the changeset viewer.