Changeset 2394
- Timestamp:
- 12/07/12 11:51:39 (11 years ago)
- Location:
- SRUClient/trunk/src
- Files:
-
- 3 added
- 5 edited
- 2 moved
Legend:
- Unmodified
- Added
- Removed
-
SRUClient/trunk/src/main/java/eu/clarin/sru/fcs/ClarinFCSRecordParser.java
r2387 r2394 1 1 /** 2 * This software is copyright (c) 2011 by2 * This software is copyright (c) 2011-2012 by 3 3 * - Institut fuer Deutsche Sprache (http://www.ids-mannheim.de) 4 4 * This is free software. You can redistribute it … … 20 20 import java.util.List; 21 21 22 import javax.xml.parsers.DocumentBuilder;23 import javax.xml.parsers.DocumentBuilderFactory;24 import javax.xml.parsers.ParserConfigurationException;25 22 import javax.xml.stream.XMLStreamException; 26 23 import javax.xml.stream.XMLStreamReader; 27 import javax.xml.transform.Transformer;28 import javax.xml.transform.TransformerConfigurationException;29 import javax.xml.transform.TransformerException;30 import javax.xml.transform.TransformerFactory;31 import javax.xml.transform.dom.DOMResult;32 import javax.xml.transform.stax.StAXSource;33 34 24 import org.slf4j.Logger; 35 25 import org.slf4j.LoggerFactory; 36 import org.w3c.dom.Document;37 import org.w3c.dom.DocumentFragment;38 import org.w3c.dom.NodeList;39 40 26 import eu.clarin.sru.client.SRUClientException; 41 27 import eu.clarin.sru.client.SRURecordData; … … 45 31 46 32 /** 47 * A record parse to parse records conforming to CLARIN FCS specification. The 48 * parser currently supports the KWIC view. 33 * A record parse to parse records conforming to CLARIN FCS specification. 49 34 */ 50 35 public class ClarinFCSRecordParser implements SRURecordDataParser { 51 private static class TransformHelper {52 private final DocumentBuilder builder;53 private final Transformer transformer;54 private Document document;55 56 57 private TransformHelper(DocumentBuilder builder,58 Transformer transformer) {59 if (builder == null) {60 throw new NullPointerException("builder == null");61 }62 this.builder = builder;63 if (transformer == null) {64 throw new NullPointerException("transformer == null");65 }66 this.transformer = transformer;67 }68 69 70 private DocumentFragment transform(XMLStreamReader reader)71 throws XMLStreamException, TransformerException {72 if (document == null) {73 document = builder.newDocument();74 }75 76 // parse STAX to DOM fragment77 DocumentFragment fragment = document.createDocumentFragment();78 DOMResult result = new DOMResult(fragment);79 transformer.transform(new StAXSource(reader), result);80 return fragment;81 }82 83 84 private void reset() {85 builder.reset();86 transformer.reset();87 document = null;88 }89 } // private class TransformHelper90 36 private static final Logger logger = 91 37 LoggerFactory.getLogger(ClarinFCSRecordParser.class); 92 38 private static final String FCS_NS = 93 39 ClarinFCSRecordData.RECORD_SCHEMA; 94 private static final String FCS_KWIC_NS = "http://clarin.eu/fcs/1.0/kwic"; 95 private static final String DATAVIEW_KWIC_LEGACY_TYPE = "kwic"; 96 private final ThreadLocal<TransformHelper> transformHelper; 97 98 99 public ClarinFCSRecordParser() { 100 this(DocumentBuilderFactory.newInstance(), 101 TransformerFactory.newInstance()); 102 } 103 104 105 public ClarinFCSRecordParser(final DocumentBuilderFactory builderFactory, 106 final TransformerFactory transformerFactory) { 107 if (builderFactory == null) { 108 throw new NullPointerException("builderFactory == null"); 109 } 110 if (transformerFactory == null) { 111 throw new NullPointerException("transformerFactory == null"); 112 } 113 this.transformHelper = new ThreadLocal<TransformHelper>() { 114 @Override 115 protected TransformHelper initialValue() { 116 try { 117 return new TransformHelper(builderFactory.newDocumentBuilder(), 118 transformerFactory.newTransformer()); 119 } catch (TransformerConfigurationException e) { 120 throw new InternalError("unexpected error creating new transformer"); 121 } catch (ParserConfigurationException e) { 122 throw new InternalError("unexpected error creating new document builder"); 123 } 124 } 125 }; 126 } 40 // TODO: make this configurable 41 private final DataViewParser[] parsers = new DataViewParser[] { 42 new DataViewParserGenericDOM(), 43 new DataViewParserKWIC() 44 }; 127 45 128 46 … … 138 56 logger.debug("parsing CLARIN-FCS record"); 139 57 140 final TransformHelper helper = transformHelper.get(); 141 try { 142 // Resource 143 XmlStreamReaderUtils.readStart(reader, FCS_NS, "Resource", true, true); 144 String pid = XmlStreamReaderUtils.readAttributeValue(reader, null, "pid"); 145 String ref = XmlStreamReaderUtils.readAttributeValue(reader, null, "ref"); 146 XmlStreamReaderUtils.consumeStart(reader); 58 // Resource 59 XmlStreamReaderUtils.readStart(reader, FCS_NS, "Resource", true, true); 60 String pid = XmlStreamReaderUtils.readAttributeValue(reader, null, "pid"); 61 String ref = XmlStreamReaderUtils.readAttributeValue(reader, null, "ref"); 62 XmlStreamReaderUtils.consumeStart(reader); 147 63 148 149 150 151 152 64 // Resource/Resource (optional) 65 if (XmlStreamReaderUtils.readStart(reader, FCS_NS, "Resource", false)) { 66 logger.info("skipping nested <Resource> element"); 67 XmlStreamReaderUtils.readEnd(reader, FCS_NS, "Resource", true); 68 } 153 69 154 155 final List<DataView> dataviews = parseDataViews(reader, helper);70 // Resource/DataView 71 final List<DataView> dataviews = parseDataViews(reader); 156 72 157 158 159 parseResourceFragments(reader, helper);73 // Resource/ResourceFragment 74 final List<Resource.ResourceFragment> resourceFragments = 75 parseResourceFragments(reader); 160 76 161 77 XmlStreamReaderUtils.readEnd(reader, FCS_NS, "Resource", true); 162 78 163 return new ClarinFCSRecordData(pid, ref, dataviews, 164 resourceFragments); 165 } finally { 166 // make sure, we reset the helper 167 helper.reset(); 168 } 79 return new ClarinFCSRecordData(pid, ref, dataviews, resourceFragments); 169 80 } 170 81 171 82 172 private static List<DataView> parseDataViews(XMLStreamReader reader,173 TransformHelper foo)throws XMLStreamException, SRUClientException {83 private List<DataView> parseDataViews(XMLStreamReader reader) 84 throws XMLStreamException, SRUClientException { 174 85 List<DataView> dataviews = null; 175 86 … … 196 107 XmlStreamReaderUtils.consumeWhitespace(reader); 197 108 198 logger.debug("found DataView of type = {}", type); 109 logger.debug("processing <DataView> of type = {}", type); 110 111 DataViewParser parser = null; 112 for (int i = 0; i < parsers.length; i++) { 113 if (parsers[i].acceptType(type) && 114 ((parser == null) || 115 (parser.getPriority() < parsers[i].getPriority()))) { 116 parser = parsers[i]; 117 } 118 } 119 199 120 DataView dataview = null; 200 if (KWICDataView.MIMETYPE.equals(type) || 201 DATAVIEW_KWIC_LEGACY_TYPE.equals(type)) { 202 logger.debug("parsing dataview using FCS-KWIC parser"); 203 dataview = parseDataViewKWIC(reader, pid, ref); 121 if (parser != null) { 122 dataview = parser.parse(reader, type, pid, ref); 204 123 } else { 205 logger.debug("parsing dataview using generic parser"); 206 dataview = parseDataViewGeneric(reader, foo, type, pid, ref); 124 logger.warn("no parser found for <DataView> of type = {}", type); 207 125 } 208 126 … … 215 133 dataviews.add(dataview); 216 134 } else { 217 logger. info("DataView of type = {} skipped", type);135 logger.warn("skipped <DataView> of type = {}", type); 218 136 } 219 137 } // while … … 222 140 223 141 224 private staticList<Resource.ResourceFragment> parseResourceFragments(225 XMLStreamReader reader , TransformHelper foo)226 throws XMLStreamException,SRUClientException {142 private List<Resource.ResourceFragment> parseResourceFragments( 143 XMLStreamReader reader) throws XMLStreamException, 144 SRUClientException { 227 145 List<Resource.ResourceFragment> resourceFragments = null; 228 146 while (XmlStreamReaderUtils.readStart(reader, FCS_NS, "ResourceFragment", false, true)) { … … 231 149 String ref = XmlStreamReaderUtils.readAttributeValue(reader, null, "ref"); 232 150 XmlStreamReaderUtils.consumeStart(reader); 233 final List<DataView> dataviews = parseDataViews(reader , foo);151 final List<DataView> dataviews = parseDataViews(reader); 234 152 XmlStreamReaderUtils.readEnd(reader, FCS_NS, "ResourceFragment", true); 235 153 … … 242 160 } 243 161 244 245 private static DataView parseDataViewGeneric(XMLStreamReader reader, 246 TransformHelper helper, String type, String pid, String ref) 247 throws XMLStreamException, SRUClientException { 248 try { 249 final DocumentFragment fragment = helper.transform(reader); 250 final NodeList children = fragment.getChildNodes(); 251 if ((children != null) && (children.getLength() > 0)) { 252 return new GenericDataView(type, pid, ref, fragment); 253 } else { 254 throw new SRUClientException("element <DataView> does not " + 255 "contain any nested elements"); 256 } 257 } catch (TransformerException e) { 258 throw new SRUClientException("error while parsing dataview", e); 259 } 260 } 261 262 263 private static DataView parseDataViewKWIC(XMLStreamReader reader, 264 String pid, String ref) throws XMLStreamException, 265 SRUClientException { 266 String left = null; 267 String keyword = null; 268 String right = null; 269 270 XmlStreamReaderUtils.readStart(reader, FCS_KWIC_NS, "kwic", true); 271 if (XmlStreamReaderUtils.readStart(reader, FCS_KWIC_NS, "c", false)) { 272 left = XmlStreamReaderUtils.readString(reader, false); 273 XmlStreamReaderUtils.readEnd(reader, FCS_KWIC_NS, "c"); 274 } 275 keyword = XmlStreamReaderUtils.readContent(reader, FCS_KWIC_NS, "kw", true); 276 if (XmlStreamReaderUtils.readStart(reader, FCS_KWIC_NS, "c", false)) { 277 right = XmlStreamReaderUtils.readString(reader, false); 278 XmlStreamReaderUtils.readEnd(reader, FCS_KWIC_NS, "c"); 279 } 280 XmlStreamReaderUtils.readEnd(reader, FCS_KWIC_NS, "kwic"); 281 282 logger.debug("left='{}' keyword='{}', right='{}'", new Object[] { 283 left, keyword, right } 284 ); 285 return new KWICDataView(pid, ref, left, keyword, right); 286 } 287 288 } // class ClarinFederatedContentSearchRecordParser 162 } // class ClarinFCSRecordParser -
SRUClient/trunk/src/main/java/eu/clarin/sru/fcs/DataView.java
r2306 r2394 1 /** 2 * This software is copyright (c) 2011-2012 by 3 * - Institut fuer Deutsche Sprache (http://www.ids-mannheim.de) 4 * This is free software. You can redistribute it 5 * and/or modify it under the terms described in 6 * the GNU General Public License v3 of which you 7 * should have received a copy. Otherwise you can download 8 * it from 9 * 10 * http://www.gnu.org/licenses/gpl-3.0.txt 11 * 12 * @copyright Institut fuer Deutsche Sprache (http://www.ids-mannheim.de) 13 * 14 * @license http://www.gnu.org/licenses/gpl-3.0.txt 15 * GNU General Public License v3 16 */ 1 17 package eu.clarin.sru.fcs; 2 18 -
SRUClient/trunk/src/main/java/eu/clarin/sru/fcs/DataViewGenericDOM.java
r2388 r2394 1 /** 2 * This software is copyright (c) 2011-2012 by 3 * - Institut fuer Deutsche Sprache (http://www.ids-mannheim.de) 4 * This is free software. You can redistribute it 5 * and/or modify it under the terms described in 6 * the GNU General Public License v3 of which you 7 * should have received a copy. Otherwise you can download 8 * it from 9 * 10 * http://www.gnu.org/licenses/gpl-3.0.txt 11 * 12 * @copyright Institut fuer Deutsche Sprache (http://www.ids-mannheim.de) 13 * 14 * @license http://www.gnu.org/licenses/gpl-3.0.txt 15 * GNU General Public License v3 16 */ 1 17 package eu.clarin.sru.fcs; 2 18 3 import org.w3c.dom.Document Fragment;19 import org.w3c.dom.Document; 4 20 5 21 6 public class GenericDataViewextends DataView {7 private final Document Fragment fragment;22 public class DataViewGenericDOM extends DataView { 23 private final Document document; 8 24 9 25 10 protected GenericDataView(String mimetype, String pid, String ref,11 Document Fragment fragment) {26 protected DataViewGenericDOM(String mimetype, String pid, String ref, 27 Document document) { 12 28 super(mimetype, pid, ref); 13 this. fragment = fragment;29 this.document = document; 14 30 } 15 31 16 32 17 public Document Fragment getDocumentFragment() {18 return fragment;33 public Document getDocument() { 34 return document; 19 35 } 20 36 -
SRUClient/trunk/src/main/java/eu/clarin/sru/fcs/DataViewKWIC.java
r2388 r2394 1 /** 2 * This software is copyright (c) 2011-2012 by 3 * - Institut fuer Deutsche Sprache (http://www.ids-mannheim.de) 4 * This is free software. You can redistribute it 5 * and/or modify it under the terms described in 6 * the GNU General Public License v3 of which you 7 * should have received a copy. Otherwise you can download 8 * it from 9 * 10 * http://www.gnu.org/licenses/gpl-3.0.txt 11 * 12 * @copyright Institut fuer Deutsche Sprache (http://www.ids-mannheim.de) 13 * 14 * @license http://www.gnu.org/licenses/gpl-3.0.txt 15 * GNU General Public License v3 16 */ 1 17 package eu.clarin.sru.fcs; 2 18 … … 4 20 * A CLARIN FCS KWIC DataView. 5 21 */ 6 public final class KWICDataViewextends DataView {22 public final class DataViewKWIC extends DataView { 7 23 /** 8 24 * The MIME type for CLARIN FCS KWIC dataviews. … … 16 32 /** 17 33 * Constructor. 18 * 34 * 19 35 * @param pid 20 36 * a persistent identifier or <code>null</code> … … 28 44 * the right KWIC context 29 45 */ 30 KWICDataView(String pid, String ref, String left, String keyword,46 DataViewKWIC(String pid, String ref, String left, String keyword, 31 47 String right) { 32 48 super(MIMETYPE, pid, ref); -
SRUClient/trunk/src/main/java/eu/clarin/sru/fcs/Resource.java
r2304 r2394 1 /** 2 * This software is copyright (c) 2011-2012 by 3 * - Institut fuer Deutsche Sprache (http://www.ids-mannheim.de) 4 * This is free software. You can redistribute it 5 * and/or modify it under the terms described in 6 * the GNU General Public License v3 of which you 7 * should have received a copy. Otherwise you can download 8 * it from 9 * 10 * http://www.gnu.org/licenses/gpl-3.0.txt 11 * 12 * @copyright Institut fuer Deutsche Sprache (http://www.ids-mannheim.de) 13 * 14 * @license http://www.gnu.org/licenses/gpl-3.0.txt 15 * GNU General Public License v3 16 */ 1 17 package eu.clarin.sru.fcs; 2 18 … … 30 46 /** 31 47 * Get the persistent identifier for this resource fragment. 32 * 48 * 33 49 * @return a persistent identifier or <code>null</code> of this resource 34 50 * fragment has none … … 41 57 /** 42 58 * Get the reference URI for this resource fragment. 43 * 59 * 44 60 * @return a reference URI or <code>null</code> of this resource 45 61 * fragment has none … … 53 69 * Convenience method to check if this resource fragment has any 54 70 * dataviews. 55 * 71 * 56 72 * @return <code>true</code> if this resource fragment has dataviews, 57 73 * <code>false</code> otherwise … … 64 80 /** 65 81 * Get the list of dataview objects for this this resource fragment. 66 * 82 * 67 83 * @return a list of {@link DataView} objects or <code>null</code>, 68 84 * or <code>null</code> if this resource fragment has … … 99 115 /** 100 116 * Get the persistent identifier for this resource. 101 * 117 * 102 118 * @return a persistent identifier or <code>null</code> of this resource has 103 119 * none … … 110 126 /** 111 127 * Get the reference URI for this resource. 112 * 128 * 113 129 * @return a reference URI or <code>null</code> of this resource has 114 130 * none … … 121 137 /** 122 138 * Convenience method to check if this resource has any dataviews. 123 * 139 * 124 140 * @return <code>true</code> if this resource has dataviews, 125 141 * <code>false</code> otherwise … … 132 148 /** 133 149 * Get the list of dataview objects for this this resource. 134 * 150 * 135 151 * @return a list of {@link DataView} objects or <code>null</code>, or 136 152 * <code>null</code> if this resource has none … … 143 159 /** 144 160 * Convenience method to check if this resource has any resource fragments. 145 * 161 * 146 162 * @return <code>true</code> if this resource has resource fragments, 147 163 * <code>false</code> otherwise … … 154 170 /** 155 171 * Get the list of resource fragment objects for this this resource. 156 * 172 * 157 173 * @return a list of {@link ResourceFragment} objects or <code>null</code>, 158 174 * or <code>null</code> if this resource has none -
SRUClient/trunk/src/test/java/eu/clarin/sru/client/TestSimpleClient.java
r2388 r2394 24 24 import eu.clarin.sru.fcs.ClarinFCSRecordData; 25 25 import eu.clarin.sru.fcs.ClarinFCSRecordParser; 26 import eu.clarin.sru.fcs.DataView;27 import eu.clarin.sru.fcs.KWICDataView;28 import eu.clarin.sru.fcs.Resource;29 26 30 27 … … 112 109 ClarinFCSRecordData record = 113 110 (ClarinFCSRecordData) data; 114 dumpResource(record.getResource());111 TestUtils.dumpResource(record.getResource()); 115 112 } 116 113 } … … 157 154 158 155 159 private static void dumpResource(Resource resource) {160 logger.info("CLARIN-FCS: pid={}, ref={}",161 resource.getPid(), resource.getRef());162 if (resource.hasDataViews()) {163 dumpDataView("CLARIN-FCS: ", resource.getDataViews());164 }165 if (resource.hasResourceFragments()) {166 for (Resource.ResourceFragment fragment : resource.getResourceFragments()) {167 logger.debug("CLARIN-FCS: ResourceFragment: pid={}, ref={}",168 fragment.getPid(), fragment.getRef());169 if (fragment.hasDataViews()) {170 dumpDataView("CLARIN-FCS: ResourceFragment/", fragment.getDataViews());171 }172 }173 }174 }175 176 177 private static void dumpDataView(String s, List<DataView> dataviews) {178 for (DataView dataview : dataviews) {179 logger.info("{}DataView: type={}, pid={}, ref={}",180 new Object[] {181 s,182 dataview.getMimeType(),183 dataview.getPid(),184 dataview.getRef()185 });186 if (dataview.isMimeType(KWICDataView.MIMETYPE)) {187 final KWICDataView kw = (KWICDataView) dataview;188 logger.info("{}DataView: {} / {} / {}",189 new Object[] {190 s,191 kw.getLeft(),192 kw.getKeyword(),193 kw.getRight() });194 }195 }196 }197 198 199 156 static { 200 157 org.apache.log4j.BasicConfigurator.configure( -
SRUClient/trunk/src/test/java/eu/clarin/sru/client/TestUtils.java
r2388 r2394 1 /** 2 * This software is copyright (c) 2011-2012 by 3 * - Institut fuer Deutsche Sprache (http://www.ids-mannheim.de) 4 * This is free software. You can redistribute it 5 * and/or modify it under the terms described in 6 * the GNU General Public License v3 of which you 7 * should have received a copy. Otherwise you can download 8 * it from 9 * 10 * http://www.gnu.org/licenses/gpl-3.0.txt 11 * 12 * @copyright Institut fuer Deutsche Sprache (http://www.ids-mannheim.de) 13 * 14 * @license http://www.gnu.org/licenses/gpl-3.0.txt 15 * GNU General Public License v3 16 */ 1 17 package eu.clarin.sru.client; 2 18 … … 5 21 import org.slf4j.Logger; 6 22 import org.slf4j.LoggerFactory; 23 import org.w3c.dom.Node; 7 24 8 25 import eu.clarin.sru.fcs.ClarinFCSRecordData; 9 26 import eu.clarin.sru.fcs.DataView; 10 import eu.clarin.sru.fcs. GenericDataView;11 import eu.clarin.sru.fcs. KWICDataView;27 import eu.clarin.sru.fcs.DataViewGenericDOM; 28 import eu.clarin.sru.fcs.DataViewKWIC; 12 29 import eu.clarin.sru.fcs.Resource; 13 30 … … 126 143 127 144 128 p rivatestatic void dumpResource(Resource resource) {145 public static void dumpResource(Resource resource) { 129 146 logger.info("CLARIN-FCS: pid={}, ref={}", 130 147 resource.getPid(), resource.getRef()); … … 153 170 dataview.getRef() 154 171 }); 155 if (dataview instanceof GenericDataView) { 156 final GenericDataView view = (GenericDataView) dataview; 157 logger.info("{}DataView: DocumentFragment with root element <{}>", 158 s, view.getDocumentFragment().getFirstChild().getNodeName()); 159 } else if (dataview.isMimeType(KWICDataView.MIMETYPE)) { 160 final KWICDataView kw = (KWICDataView) dataview; 172 if (dataview instanceof DataViewGenericDOM) { 173 final DataViewGenericDOM view = (DataViewGenericDOM) dataview; 174 final Node root = view.getDocument().getFirstChild(); 175 logger.info("{}DataView: root element <{}> / {}", 176 new Object[] { 177 s, 178 root.getNodeName(), 179 root.getOwnerDocument().hashCode() }); 180 } else if (dataview.isMimeType(DataViewKWIC.MIMETYPE)) { 181 final DataViewKWIC kw = (DataViewKWIC) dataview; 161 182 logger.info("{}DataView: {} / {} / {}", 162 183 new Object[] { … … 169 190 } 170 191 171 } 192 } // class TestUtils
Note: See TracChangeset
for help on using the changeset viewer.