Changeset 5394 for CMDIValidator
- Timestamp:
- 06/26/14 22:58:39 (10 years ago)
- Location:
- CMDIValidator/trunk/cmdi-validator-core/src/main
- Files:
-
- 2 added
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
CMDIValidator/trunk/cmdi-validator-core/src/main/java/eu/clarin/cmdi/validator/CMDIValidator.java
r5390 r5394 33 33 34 34 import javax.xml.XMLConstants; 35 import javax.xml.transform.dom.DOMSource; 35 import javax.xml.transform.ErrorListener; 36 import javax.xml.transform.TransformerException; 37 import javax.xml.transform.sax.SAXSource; 36 38 import javax.xml.transform.stream.StreamSource; 37 39 38 40 import net.java.truevfs.access.TFile; 39 41 import net.java.truevfs.access.TFileInputStream; 40 import net.sf.saxon. s9api.Axis;42 import net.sf.saxon.Configuration; 41 43 import net.sf.saxon.s9api.DocumentBuilder; 42 44 import net.sf.saxon.s9api.Processor; … … 45 47 import net.sf.saxon.s9api.WhitespaceStrippingPolicy; 46 48 import net.sf.saxon.s9api.XPathCompiler; 47 import net.sf.saxon.s9api.XPathExecutable;48 49 import net.sf.saxon.s9api.XPathSelector; 50 import net.sf.saxon.s9api.XQueryCompiler; 51 import net.sf.saxon.s9api.XQueryEvaluator; 52 import net.sf.saxon.s9api.XQueryExecutable; 49 53 import net.sf.saxon.s9api.XdmDestination; 50 54 import net.sf.saxon.s9api.XdmItem; 51 55 import net.sf.saxon.s9api.XdmNode; 52 import net.sf.saxon.s9api.XdmSequenceIterator;53 56 import net.sf.saxon.s9api.XsltCompiler; 54 57 import net.sf.saxon.s9api.XsltExecutable; … … 58 61 import org.apache.xerces.impl.xs.XMLSchemaLoader; 59 62 import org.apache.xerces.impl.xs.XSDDescription; 60 import org.apache.xerces.parsers. DOMParser;63 import org.apache.xerces.parsers.SAXParser; 61 64 import org.apache.xerces.parsers.XML11Configuration; 62 65 import org.apache.xerces.util.SymbolTable; … … 72 75 import org.slf4j.Logger; 73 76 import org.slf4j.LoggerFactory; 74 import org. w3c.dom.Document;77 import org.xml.sax.ErrorHandler; 75 78 import org.xml.sax.InputSource; 76 79 import org.xml.sax.SAXException; 80 import org.xml.sax.SAXParseException; 77 81 78 82 import eu.clarin.cmdi.validator.CMDIValidatorResult.Severity; 79 83 import eu.clarin.cmdi.validator.utils.LRUCache; 84 import eu.clarin.cmdi.validator.utils.SaxonLocationUtils; 80 85 81 86 … … 92 97 private static final String SCHEMATATRON_STAGE_3 = 93 98 "/schematron/iso_svrl_for_xslt2.xsl"; 99 private static final String ANALYZE_SVRL = 100 "/analyze-svrl.xq"; 94 101 private static final String DEFAULT_SCHEMATRON_SCHEMA = 95 102 "/default.sch"; … … 111 118 "http://apache.org/xml/features/honour-all-schemaLocations"; 112 119 private static final int INITAL_SYMBOL_TABLE_SIZE = 16141; 113 private static final String SVRL_NAMESPACE_URI = 114 "http://purl.oclc.org/dsdl/svrl"; 115 private static final String SVRL_NAMESPACE_PREFIX = 116 "svrl"; 117 private static final QName SVRL_TEXT = 118 new QName(SVRL_NAMESPACE_URI, "text"); 119 private static final QName SVRL_FAILED_ASSERT = 120 new QName(SVRL_NAMESPACE_URI, "failed-assert"); 120 private static final QName SVRL_S = new QName("s"); 121 private static final QName SVRL_L = new QName("l"); 121 122 private final Processor processor; 122 123 private final CMDISchemaLoader schemaLoader; 123 124 private final XsltExecutable schematronValidatorExecutable; 124 private final XPathExecutable xpath1; 125 private final XPathExecutable xpath2; 125 private final XQueryExecutable analyzeSchematronReport; 126 126 private final List<CMDIValidatorExtension> extensions; 127 127 private final FileEnumerator files; … … 156 156 logger.debug("initializing Saxon ..."); 157 157 this.processor = new Processor(true); 158 final Configuration saxonConfig = 159 this.processor.getUnderlyingConfiguration(); 160 saxonConfig.setErrorListener(new ErrorListener() { 161 @Override 162 public void warning(TransformerException exception) 163 throws TransformerException { 164 throw exception; 165 } 166 167 168 @Override 169 public void fatalError(TransformerException exception) 170 throws TransformerException { 171 throw exception; 172 } 173 174 175 @Override 176 public void error(TransformerException exception) 177 throws TransformerException { 178 throw exception; 179 } 180 }); 181 158 182 159 183 /* … … 163 187 this.schematronValidatorExecutable = 164 188 initSchematronValidator(config, processor); 189 InputStream stream = null; 165 190 try { 166 final XPathCompiler compiler = processor.newXPathCompiler(); 167 compiler.declareNamespace(SVRL_NAMESPACE_PREFIX, SVRL_NAMESPACE_URI); 168 this.xpath1 = compiler.compile("//(svrl:failed-assert|svrl:successful-report)"); 169 this.xpath2 = compiler.compile("preceding-sibling::svrl:fired-rule/@role"); 191 stream = getClass().getResourceAsStream(ANALYZE_SVRL); 192 final XQueryCompiler compiler = processor.newXQueryCompiler(); 193 this.analyzeSchematronReport = compiler.compile(stream); 194 } catch (IOException e) { 195 throw new CMDIValidatorInitException( 196 "error initializing schematron validator", e); 170 197 } catch (SaxonApiException e) { 171 198 throw new CMDIValidatorInitException( 172 199 "error initializing schematron validator", e); 200 } finally { 201 if (stream != null) { 202 try { 203 stream.close(); 204 } catch (IOException e) { 205 /* IGNORE */ 206 } 207 } 173 208 } 174 209 logger.debug("Schematron validator successfully initialized"); 175 210 } else { 176 211 this.schematronValidatorExecutable = null; 177 this.xpath1 = null; 178 this.xpath2 = null; 212 this.analyzeSchematronReport = null; 179 213 } 180 214 … … 424 458 425 459 private final class ThreadContext { 426 private final XML11Configuration xercesConfig;460 private final SAXParser parser; 427 461 private final XsltTransformer schematronValidator; 428 462 private final DocumentBuilder builder; … … 434 468 * initialize Xerces 435 469 */ 436 finalXMLEntityResolver resolver = new XMLEntityResolver() {470 XMLEntityResolver resolver = new XMLEntityResolver() { 437 471 @Override 438 472 public XMLInputSource resolveEntity( … … 505 539 } 506 540 507 xercesConfig = new XML11Configuration(symbols, pool); 541 XML11Configuration xercesConfig = 542 new XML11Configuration(symbols, pool); 508 543 xercesConfig.setFeature(NAMESPACES_FEATURE_ID, true); 509 544 xercesConfig.setFeature(VALIDATION_FEATURE_ID, true); … … 512 547 xercesConfig.setFeature(HONOUR_ALL_SCHEMA_LOCATIONS_ID, true); 513 548 xercesConfig.setEntityResolver(resolver); 514 xercesConfig.setErrorHandler(new XMLErrorHandler() { 549 550 /* 551 * create a reusable parser and also add an error handler. 552 * We cannot use a global error handler in xerces config, because 553 * Saxon ignores and overwrites it ... 554 */ 555 this.parser = new SAXParser(xercesConfig); 556 this.parser.setErrorHandler(new ErrorHandler() { 515 557 @Override 516 public void warning(String domain, String key, 517 XMLParseException e) throws XNIException { 558 public void warning(SAXParseException e) throws SAXException { 518 559 reportWarning(e.getLineNumber(), 519 560 e.getColumnNumber(), 520 e.getMessage(), e);521 }522 561 e.getMessage(), 562 e); 563 } 523 564 524 565 @Override 525 public void error(String domain, String key, 526 XMLParseException e) throws XNIException { 566 public void error(SAXParseException e) throws SAXException { 527 567 reportError(e.getLineNumber(), 528 568 e.getColumnNumber(), 529 e.getMessage(), e); 569 e.getMessage(), 570 e); 530 571 throw e; 531 572 } 532 573 533 534 574 @Override 535 public void fatalError(String domain, String key, 536 XMLParseException e) throws XNIException { 575 public void fatalError(SAXParseException e) throws SAXException { 537 576 reportError(e.getLineNumber(), 538 577 e.getColumnNumber(), 539 e.getMessage(), e); 578 e.getMessage(), 579 e); 540 580 throw e; 541 581 } … … 543 583 544 584 /* 545 * initialize Saxon document builder585 * initialize and configure Saxon document builder 546 586 */ 547 587 this.builder = processor.newDocumentBuilder(); 548 588 this.builder.setWhitespaceStrippingPolicy( 549 589 WhitespaceStrippingPolicy.IGNORABLE); 550 590 this.builder.setLineNumbering(true); 551 591 /* 552 * initialize schematron 592 * even though, we need to perform Schema validation, tell 593 * Saxon to enable DTD validation. Otherwise, it will 594 * not validate at all ... :/ 595 */ 596 this.builder.setDTDValidation(true); 597 598 /* 599 * initialize Schematron validator 553 600 */ 554 601 if (schematronValidatorExecutable != null) { … … 623 670 throws CMDIValidatorException { 624 671 try { 625 final DOMParser parser = new DOMParser(xercesConfig);626 672 try { 627 parser.parse(new InputSource(stream)); 628 stream.close(); 629 630 final Document dom = parser.getDocument(); 631 if (dom == null) { 673 final SAXSource source = 674 new SAXSource(parser, new InputSource(stream)); 675 return builder.build(source); 676 } finally { 677 /* recycle parser */ 678 try { 679 parser.reset(); 680 } catch (XNIException e) { 632 681 throw new CMDIValidatorException( 633 "parser returned no return result"); 634 } 635 return builder.build(new DOMSource(dom)); 636 } finally { 637 parser.reset(); 638 } 639 } catch (SAXException e) { 640 logger.trace("error parsing instance", e); 641 return null; 682 "error resetting parser", e); 683 } finally { 684 /* really make sure, stream is closed */ 685 stream.close(); 686 } 687 } 642 688 } catch (SaxonApiException e) { 643 689 logger.trace("error parsing instance", e); … … 648 694 : "input/output error"; 649 695 throw new CMDIValidatorException(message, e); 650 } finally {651 /* really make sure, stream is closed */652 try {653 stream.close();654 } catch (IOException e) {655 /* IGNORE */656 }657 696 } 658 697 } … … 668 707 schematronValidator.transform(); 669 708 670 final XPathSelector selector = xpath1.load(); 671 selector.setContextItem(destination.getXdmNode()); 672 for (XdmItem item : selector) { 673 final XdmNode node = (XdmNode) item; 674 final XdmNode text = getFirstChild(node, SVRL_TEXT); 675 String msg = (text != null) 676 ? text.getStringValue().trim() 677 : null; 678 if (SVRL_FAILED_ASSERT.equals(node.getNodeName())) { 679 final XPathSelector selector2 = xpath2.load(); 680 String role = null; 681 selector2.setContextItem(node); 682 XdmItem evaluateSingle = selector2.evaluateSingle(); 683 if (evaluateSingle != null) { 684 role = evaluateSingle.getStringValue().trim(); 709 final XdmNode report = destination.getXdmNode(); 710 if (report != null) { 711 XPathCompiler xpathCompiler = null; 712 final XQueryEvaluator evaluator = 713 analyzeSchematronReport.load(); 714 evaluator.setContextItem(report); 715 for (final XdmItem item : evaluator) { 716 /* lazy initialize XPath compiler */ 717 if (xpathCompiler == null) { 718 xpathCompiler = processor.newXPathCompiler(); 719 xpathCompiler.setCaching(true); 685 720 } 686 if ("warning".equalsIgnoreCase(role)) { 687 result.reportWarning(-1, -1, msg); 721 final XdmNode node = (XdmNode) item; 722 final String s = 723 nullSafeTrim(node.getAttributeValue(SVRL_S)); 724 final String l = 725 nullSafeTrim(node.getAttributeValue(SVRL_L)); 726 final String m = 727 nullSafeTrim(node.getStringValue()); 728 int line = -1; 729 int column = -1; 730 if (l != null) { 731 XPathSelector xs = xpathCompiler.compile(l).load(); 732 xs.setContextItem(document); 733 XdmItem n = xs.evaluateSingle(); 734 line = SaxonLocationUtils.getLineNumber(n); 735 column = SaxonLocationUtils.getColumnNumber(n); 736 } 737 if ("I".equals(s)) { 738 result.reportInfo(line, column, m); 739 } else if ("W".equals(s)) { 740 result.reportWarning(line, column, m); 688 741 } else { 689 result.reportError( -1, -1, msg);742 result.reportError(line, column, m); 690 743 } 691 } else { 692 result.reportInfo(-1, -1, msg); 744 } // for 745 if (xpathCompiler != null) { 746 xpathCompiler.setCaching(false); 747 xpathCompiler = null; 693 748 } 694 749 } … … 700 755 701 756 702 private XdmNode getFirstChild(XdmNode parent, QName name) { 703 XdmSequenceIterator i = parent.axisIterator(Axis.CHILD, name); 704 if (i.hasNext()) { 705 return (XdmNode) i.next(); 706 } else { 707 return null; 708 } 757 private String nullSafeTrim(String s) { 758 if (s != null) { 759 s = s.trim(); 760 if (s.isEmpty()) { 761 s = null; 762 } 763 } 764 return s; 709 765 } 710 766 -
CMDIValidator/trunk/cmdi-validator-core/src/main/java/eu/clarin/cmdi/validator/extensions/CheckHandlesExtension.java
r5387 r5394 22 22 23 23 import org.apache.http.HttpStatus; 24 import org.slf4j.Logger;25 import org.slf4j.LoggerFactory;26 27 24 import net.sf.saxon.s9api.SaxonApiException; 28 25 import net.sf.saxon.s9api.XPathCompiler; … … 36 33 import eu.clarin.cmdi.validator.CMDIValidatorWriteableResult; 37 34 import eu.clarin.cmdi.validator.utils.HandleResolver; 35 import eu.clarin.cmdi.validator.utils.SaxonLocationUtils; 38 36 39 37 public class CheckHandlesExtension extends CMDIValidatorExtension { 40 private static final String XPATH = "//*:ResourceProxy[*:ResourceType/text() = 'Resource' or *:ResourceType/text() = 'Metadata']/*:ResourceRef /text()";38 private static final String XPATH = "//*:ResourceProxy[*:ResourceType/text() = 'Resource' or *:ResourceType/text() = 'Metadata']/*:ResourceRef"; 41 39 private static final String HDL_SCHEME = "hdl"; 42 40 private static final String HDL_PROXY_HTTP = "http"; … … 44 42 private static final String HDL_PROXY_HOST = "hdl.handle.net"; 45 43 private static final String URN_SCHEME = "urn"; 46 @SuppressWarnings("unused")47 private static final Logger logger =48 LoggerFactory.getLogger(CheckHandlesExtension.class);49 44 private final boolean resolveHandles; 50 45 private HandleResolver resolver = null; … … 91 86 for (XdmItem item : selector) { 92 87 String handle = null; 88 final int line = SaxonLocationUtils.getLineNumber(item); 89 final int column = SaxonLocationUtils.getColumnNumber(item); 93 90 final String h = item.getStringValue(); 94 91 if (h != null) { … … 98 95 } else { 99 96 if (!handle.equals(h)) { 100 result.reportWarning( -1, -1, "handle '" + h +97 result.reportWarning(line, column, "handle '" + h + 101 98 "' contains leading or tailing spaces " + 102 99 "within <ResourceRef> element"); … … 106 103 107 104 if (handle != null) { 108 checkHandleURISyntax(handle, result );109 } else { 110 result.reportError( -1, -1,105 checkHandleURISyntax(handle, result, line, column); 106 } else { 107 result.reportError(line, column, 111 108 "invalid handle (<ResourceRef> was empty)"); 112 109 } … … 119 116 120 117 private void checkHandleURISyntax(String handle, 121 CMDIValidatorWriteableResult result) throws CMDIValidatorException { 118 CMDIValidatorWriteableResult result, int line, int column) 119 throws CMDIValidatorException { 122 120 try { 123 121 final URI uri = new URI(handle); … … 130 128 final URI actionableURI = 131 129 new URI(HDL_PROXY_HTTP, HDL_PROXY_HOST, path, null); 132 checkHandleResolves(actionableURI, result );130 checkHandleResolves(actionableURI, result, line, column); 133 131 } catch (URISyntaxException e) { 134 132 /* should not happen */ … … 138 136 } else if (URN_SCHEME.equals(uri.getScheme())) { 139 137 if (resolveHandles) { 140 result.reportInfo( -1, -1, "PID '" + handle +138 result.reportInfo(line, column, "PID '" + handle + 141 139 "' skipped, because URN resolving is not supported"); 142 140 } else { 143 result.reportInfo( -1, -1, "PID '" + handle +141 result.reportInfo(line, column, "PID '" + handle + 144 142 "' skipped, because URN sytax checking is not supported"); 145 143 } … … 148 146 if (uri.getHost() != null) { 149 147 if (!HDL_PROXY_HOST.equalsIgnoreCase(uri.getHost())) { 150 result.reportError( -1, -1,148 result.reportError(line, column, 151 149 "The URI of PID '" + handle + 152 150 "' contains an unexpected host part of '" + 153 151 uri.getHost() + "'"); 154 152 } 155 checkHandleResolves(uri, result );156 } else { 157 result.reportError( -1, -1, "The URI of PID '" + handle+158 "' is missing the host part");153 checkHandleResolves(uri, result, line, column); 154 } else { 155 result.reportError(line, column, "The URI of PID '" + 156 handle + "' is missing the host part"); 159 157 } 160 158 } else { 161 159 if (uri.getScheme() != null) { 162 result.reportError( -1, -1,160 result.reportError(line, column, 163 161 "The URI of PID '" + handle + 164 162 "' contains an unexpected schema part of '" + 165 163 uri.getScheme() + "'"); 166 164 } else { 167 result.reportError( -1, -1, "The URI of PID '" + handle+168 "' is missing a proper schema part");165 result.reportError(line, column, "The URI of PID '" + 166 handle + "' is missing a proper schema part"); 169 167 } 170 168 } 171 169 } catch (URISyntaxException e) { 172 result.reportError( -1, -1, "PID '" + handle +170 result.reportError(line, column, "PID '" + handle + 173 171 "' is not a well-formed URI: " + e.getMessage()); 174 172 } … … 177 175 178 176 private void checkHandleResolves(URI uri, 179 CMDIValidatorWriteableResult result) throws CMDIValidatorException { 177 CMDIValidatorWriteableResult result, int line, int column) 178 throws CMDIValidatorException { 180 179 if (resolver != null) { 181 180 try { … … 188 187 /* FALL-THROUGH */ 189 188 case HttpStatus.SC_FORBIDDEN: 190 result.reportInfo( -1, -1, "PID '" + uri +189 result.reportInfo(line, column, "PID '" + uri + 191 190 "' resolved to an access protected resource (" + 192 191 code + ")"); 193 192 break; 194 193 case HttpStatus.SC_NOT_FOUND: 195 result.reportError( -1, -1, "PID '" + uri +196 "' resolved to an non-existing resource (" + code +197 ")");194 result.reportError(line, column, "PID '" + uri + 195 "' resolved to an non-existing resource (" + 196 code + ")"); 198 197 break; 199 198 case HandleResolver.TIMEOUT: 200 result.reportWarning( -1, -1, "Timeout while resolving PID '" +201 uri + "'");199 result.reportWarning(line, column, 200 "Timeout while resolving PID '" + uri + "'"); 202 201 break; 203 202 case HandleResolver.UNKNOWN_HOST: 204 result.reportWarning( -1, -1, "Unable to resolve host '" +205 uri.getHost() + "' while resolving PID '"+206 uri + "'");203 result.reportWarning(line, column, 204 "Unable to resolve host '" + uri.getHost() + 205 "' while resolving PID '" + uri + "'"); 207 206 break; 208 207 case HandleResolver.ERROR: 209 result.reportWarning( -1, -1,208 result.reportWarning(line, column, 210 209 "An error occurred while resolving PID '" + 211 210 uri + "'"); 212 211 break; 213 212 default: 214 result.reportWarning(- 1, -1, "PID '" + uri +215 "' resolved with an unexpected result (" + code +216 ")");213 result.reportWarning(-line, column, "PID '" + uri + 214 "' resolved with an unexpected result (" + 215 code + ")"); 217 216 break; 218 217 } // switch
Note: See TracChangeset
for help on using the changeset viewer.