- Timestamp:
- 12/05/12 19:41:27 (11 years ago)
- Location:
- SRUClient/trunk/src/main/java/eu/clarin/sru/fcs
- Files:
-
- 1 added
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
SRUClient/trunk/src/main/java/eu/clarin/sru/fcs/ClarinFCSRecordParser.java
r2384 r2387 20 20 import java.util.List; 21 21 22 import javax.xml.parsers.DocumentBuilder; 23 import javax.xml.parsers.DocumentBuilderFactory; 24 import javax.xml.parsers.ParserConfigurationException; 22 25 import javax.xml.stream.XMLStreamException; 23 26 import javax.xml.stream.XMLStreamReader; 27 import javax.xml.transform.Transformer; 28 import javax.xml.transform.TransformerConfigurationException; 29 import javax.xml.transform.TransformerException; 30 import javax.xml.transform.TransformerFactory; 31 import javax.xml.transform.dom.DOMResult; 32 import javax.xml.transform.stax.StAXSource; 24 33 25 34 import org.slf4j.Logger; 26 35 import org.slf4j.LoggerFactory; 36 import org.w3c.dom.Document; 37 import org.w3c.dom.DocumentFragment; 38 import org.w3c.dom.NodeList; 27 39 28 40 import eu.clarin.sru.client.SRUClientException; … … 36 48 * parser currently supports the KWIC view. 37 49 */ 38 public class ClarinFCSRecordParser implements 39 SRURecordDataParser { 50 public class ClarinFCSRecordParser implements SRURecordDataParser { 51 private static class TransformHelper { 52 private final DocumentBuilder builder; 53 private final Transformer transformer; 54 private Document document; 55 56 57 private TransformHelper(DocumentBuilder builder, 58 Transformer transformer) { 59 if (builder == null) { 60 throw new NullPointerException("builder == null"); 61 } 62 this.builder = builder; 63 if (transformer == null) { 64 throw new NullPointerException("transformer == null"); 65 } 66 this.transformer = transformer; 67 } 68 69 70 private DocumentFragment transform(XMLStreamReader reader) 71 throws XMLStreamException, TransformerException { 72 if (document == null) { 73 document = builder.newDocument(); 74 } 75 76 // parse STAX to DOM fragment 77 DocumentFragment fragment = document.createDocumentFragment(); 78 DOMResult result = new DOMResult(fragment); 79 transformer.transform(new StAXSource(reader), result); 80 return fragment; 81 } 82 83 84 private void reset() { 85 builder.reset(); 86 transformer.reset(); 87 document = null; 88 } 89 } // private class TransformHelper 40 90 private static final Logger logger = 41 91 LoggerFactory.getLogger(ClarinFCSRecordParser.class); … … 44 94 private static final String FCS_KWIC_NS = "http://clarin.eu/fcs/1.0/kwic"; 45 95 private static final String DATAVIEW_KWIC_LEGACY_TYPE = "kwic"; 96 private final ThreadLocal<TransformHelper> transformHelper; 97 98 99 public ClarinFCSRecordParser() { 100 this(DocumentBuilderFactory.newInstance(), 101 TransformerFactory.newInstance()); 102 } 103 104 105 public ClarinFCSRecordParser(final DocumentBuilderFactory builderFactory, 106 final TransformerFactory transformerFactory) { 107 if (builderFactory == null) { 108 throw new NullPointerException("builderFactory == null"); 109 } 110 if (transformerFactory == null) { 111 throw new NullPointerException("transformerFactory == null"); 112 } 113 this.transformHelper = new ThreadLocal<TransformHelper>() { 114 @Override 115 protected TransformHelper initialValue() { 116 try { 117 return new TransformHelper(builderFactory.newDocumentBuilder(), 118 transformerFactory.newTransformer()); 119 } catch (TransformerConfigurationException e) { 120 throw new InternalError("unexpected error creating new transformer"); 121 } catch (ParserConfigurationException e) { 122 throw new InternalError("unexpected error creating new document builder"); 123 } 124 } 125 }; 126 } 46 127 47 128 … … 56 137 throws XMLStreamException, SRUClientException { 57 138 logger.debug("parsing CLARIN-FCS record"); 58 // Resource 59 XmlStreamReaderUtils.readStart(reader, FCS_NS, "Resource", true, true); 60 String pid = XmlStreamReaderUtils.readAttributeValue(reader, null, "pid"); 61 String ref = XmlStreamReaderUtils.readAttributeValue(reader, null, "ref"); 62 XmlStreamReaderUtils.consumeStart(reader); 63 64 // Resource/Resource (optional) 65 if (XmlStreamReaderUtils.readStart(reader, FCS_NS, "Resource", false)) { 66 logger.info("skipping nested <Resource> element"); 139 140 final TransformHelper helper = transformHelper.get(); 141 try { 142 // Resource 143 XmlStreamReaderUtils.readStart(reader, FCS_NS, "Resource", true, true); 144 String pid = XmlStreamReaderUtils.readAttributeValue(reader, null, "pid"); 145 String ref = XmlStreamReaderUtils.readAttributeValue(reader, null, "ref"); 146 XmlStreamReaderUtils.consumeStart(reader); 147 148 // Resource/Resource (optional) 149 if (XmlStreamReaderUtils.readStart(reader, FCS_NS, "Resource", false)) { 150 logger.info("skipping nested <Resource> element"); 151 XmlStreamReaderUtils.readEnd(reader, FCS_NS, "Resource", true); 152 } 153 154 // Resource/DataView 155 final List<DataView> dataviews = parseDataViews(reader, helper); 156 157 // Resource/ResourceFragment 158 final List<Resource.ResourceFragment> resourceFragments = 159 parseResourceFragments(reader, helper); 160 67 161 XmlStreamReaderUtils.readEnd(reader, FCS_NS, "Resource", true); 68 } 69 70 // Resource/DataView 71 final List<DataView> dataviews = parseDataViews(reader); 72 73 // Resource/ResourceFragment 74 final List<Resource.ResourceFragment> resourceFragments = 75 parseResourceFragments(reader); 76 77 XmlStreamReaderUtils.readEnd(reader, FCS_NS, "Resource", true); 78 79 return new ClarinFCSRecordData(pid, ref, dataviews, 80 resourceFragments); 81 } 82 83 84 private static List<DataView> parseDataViews(XMLStreamReader reader) 85 throws XMLStreamException, SRUClientException { 162 163 return new ClarinFCSRecordData(pid, ref, dataviews, 164 resourceFragments); 165 } finally { 166 // make sure, we reset the helper 167 helper.reset(); 168 } 169 } 170 171 172 private static List<DataView> parseDataViews(XMLStreamReader reader, 173 TransformHelper foo) throws XMLStreamException, SRUClientException { 86 174 List<DataView> dataviews = null; 87 175 … … 94 182 "'mime-type'; trying attribute 'type' instead"); 95 183 type = XmlStreamReaderUtils.readAttributeValue(reader, null, "type"); 96 } 184 if (type != null) { 185 logger.warn("attribute 'type' is deprecated for element " + 186 "<DataView>; please use 'mime-type' attribute"); 187 } 188 } 189 if ((type == null) || type.isEmpty()) { 190 throw new SRUClientException("element <DataView> needs a " 191 + "non-empty 'mime-type' (or 'type') attribute"); 192 } 193 194 // consume start element and get rid of any whitespace 97 195 XmlStreamReaderUtils.consumeStart(reader); 98 if ((type == null) || type.isEmpty()) { 99 throw new SRUClientException("element <DataView> need as " 100 + "non-empty 'mime-type' (or 'type') attribute"); 101 } 196 XmlStreamReaderUtils.consumeWhitespace(reader); 197 102 198 logger.debug("found DataView of type = {}", type); 103 199 DataView dataview = null; 104 200 if (KWICDataView.MIMETYPE.equals(type) || 105 201 DATAVIEW_KWIC_LEGACY_TYPE.equals(type)) { 202 logger.debug("parsing dataview using FCS-KWIC parser"); 106 203 dataview = parseDataViewKWIC(reader, pid, ref); 107 } 204 } else { 205 logger.debug("parsing dataview using generic parser"); 206 dataview = parseDataViewGeneric(reader, foo, type, pid, ref); 207 } 208 108 209 XmlStreamReaderUtils.readEnd(reader, FCS_NS, "DataView", true); 109 210 … … 122 223 123 224 private static List<Resource.ResourceFragment> parseResourceFragments( 124 XMLStreamReader reader ) throws XMLStreamException,125 SRUClientException {225 XMLStreamReader reader, TransformHelper foo) 226 throws XMLStreamException, SRUClientException { 126 227 List<Resource.ResourceFragment> resourceFragments = null; 127 228 while (XmlStreamReaderUtils.readStart(reader, FCS_NS, "ResourceFragment", false, true)) { … … 130 231 String ref = XmlStreamReaderUtils.readAttributeValue(reader, null, "ref"); 131 232 XmlStreamReaderUtils.consumeStart(reader); 132 List<DataView> dataviews = parseDataViews(reader);233 final List<DataView> dataviews = parseDataViews(reader, foo); 133 234 XmlStreamReaderUtils.readEnd(reader, FCS_NS, "ResourceFragment", true); 134 235 … … 139 240 } // while 140 241 return resourceFragments; 242 } 243 244 245 private static DataView parseDataViewGeneric(XMLStreamReader reader, 246 TransformHelper helper, String type, String pid, String ref) 247 throws XMLStreamException, SRUClientException { 248 try { 249 final DocumentFragment fragment = helper.transform(reader); 250 final NodeList children = fragment.getChildNodes(); 251 if ((children != null) && (children.getLength() > 0)) { 252 return new GenericDataView(type, pid, ref, fragment); 253 } else { 254 throw new SRUClientException("element <DataView> does not " + 255 "contain any nested elements"); 256 } 257 } catch (TransformerException e) { 258 throw new SRUClientException("error while parsing dataview", e); 259 } 141 260 } 142 261 … … 154 273 XmlStreamReaderUtils.readEnd(reader, FCS_KWIC_NS, "c"); 155 274 } 156 keyword = XmlStreamReaderUtils.readContent(reader, FCS_KWIC_NS, "kw", 157 true); 275 keyword = XmlStreamReaderUtils.readContent(reader, FCS_KWIC_NS, "kw", true); 158 276 if (XmlStreamReaderUtils.readStart(reader, FCS_KWIC_NS, "c", false)) { 159 277 right = XmlStreamReaderUtils.readString(reader, false);
Note: See TracChangeset
for help on using the changeset viewer.