1 | /** |
---|
2 | * This software is copyright (c) 2014 by |
---|
3 | * - Institut fuer Deutsche Sprache (http://www.ids-mannheim.de) |
---|
4 | * This is free software. You can redistribute it |
---|
5 | * and/or modify it under the terms described in |
---|
6 | * the GNU General Public License v3 of which you |
---|
7 | * should have received a copy. Otherwise you can download |
---|
8 | * it from |
---|
9 | * |
---|
10 | * http://www.gnu.org/licenses/gpl-3.0.txt |
---|
11 | * |
---|
12 | * @copyright Institut fuer Deutsche Sprache (http://www.ids-mannheim.de) |
---|
13 | * |
---|
14 | * @license http://www.gnu.org/licenses/gpl-3.0.txt |
---|
15 | * GNU General Public License v3 |
---|
16 | */ |
---|
17 | package eu.clarin.cmdi.validator.extensions; |
---|
18 | |
---|
19 | import java.io.IOException; |
---|
20 | import java.net.URI; |
---|
21 | import java.net.URISyntaxException; |
---|
22 | |
---|
23 | import org.apache.http.HttpStatus; |
---|
24 | import net.sf.saxon.s9api.SaxonApiException; |
---|
25 | import net.sf.saxon.s9api.XPathCompiler; |
---|
26 | import net.sf.saxon.s9api.XPathExecutable; |
---|
27 | import net.sf.saxon.s9api.XPathSelector; |
---|
28 | import net.sf.saxon.s9api.XdmItem; |
---|
29 | import net.sf.saxon.s9api.XdmNode; |
---|
30 | import eu.clarin.cmdi.validator.CMDIValidatorException; |
---|
31 | import eu.clarin.cmdi.validator.CMDIValidatorExtension; |
---|
32 | import eu.clarin.cmdi.validator.CMDIValidatorInitException; |
---|
33 | import eu.clarin.cmdi.validator.CMDIValidatorWriteableResult; |
---|
34 | import eu.clarin.cmdi.validator.utils.HandleResolver; |
---|
35 | import eu.clarin.cmdi.validator.utils.SaxonLocationUtils; |
---|
36 | |
---|
37 | public class CheckHandlesExtension extends CMDIValidatorExtension { |
---|
38 | private static final String XPATH = "//*:ResourceProxy[*:ResourceType/text() = 'Resource' or *:ResourceType/text() = 'Metadata']/*:ResourceRef"; |
---|
39 | private static final String HDL_SCHEME = "hdl"; |
---|
40 | private static final String HDL_PROXY_HTTP = "http"; |
---|
41 | private static final String HDL_PROXY_HTTPS = "https"; |
---|
42 | private static final String HDL_PROXY_HOST = "hdl.handle.net"; |
---|
43 | private static final String URN_SCHEME = "urn"; |
---|
44 | private final boolean resolveHandles; |
---|
45 | private HandleResolver resolver = null; |
---|
46 | private XPathExecutable xpath; |
---|
47 | |
---|
48 | |
---|
49 | public CheckHandlesExtension(boolean resolveHandles) { |
---|
50 | this.resolveHandles = resolveHandles; |
---|
51 | } |
---|
52 | |
---|
53 | |
---|
54 | public boolean isResolvingHandles() { |
---|
55 | return resolveHandles; |
---|
56 | } |
---|
57 | |
---|
58 | |
---|
59 | public HandleResolver.Statistics getStatistics() { |
---|
60 | return (resolver != null) ? resolver.getStatistics() : null; |
---|
61 | } |
---|
62 | |
---|
63 | |
---|
64 | @Override |
---|
65 | protected void doInitialize() throws CMDIValidatorInitException { |
---|
66 | if (resolveHandles) { |
---|
67 | this.resolver = new HandleResolver(); |
---|
68 | } |
---|
69 | |
---|
70 | try { |
---|
71 | final XPathCompiler compiler = processor.newXPathCompiler(); |
---|
72 | this.xpath = compiler.compile(XPATH); |
---|
73 | } catch (SaxonApiException e) { |
---|
74 | throw new CMDIValidatorInitException( |
---|
75 | "error initializing check handle extension", e); |
---|
76 | } |
---|
77 | } |
---|
78 | |
---|
79 | |
---|
80 | @Override |
---|
81 | public void validate(XdmNode document, CMDIValidatorWriteableResult result) |
---|
82 | throws CMDIValidatorException { |
---|
83 | try { |
---|
84 | XPathSelector selector = xpath.load(); |
---|
85 | selector.setContextItem(document); |
---|
86 | for (XdmItem item : selector) { |
---|
87 | String handle = null; |
---|
88 | final int line = SaxonLocationUtils.getLineNumber(item); |
---|
89 | final int column = SaxonLocationUtils.getColumnNumber(item); |
---|
90 | final String h = item.getStringValue(); |
---|
91 | if (h != null) { |
---|
92 | handle = h.trim(); |
---|
93 | if (handle.isEmpty()) { |
---|
94 | handle = null; |
---|
95 | } else { |
---|
96 | if (!handle.equals(h)) { |
---|
97 | result.reportWarning(line, column, "handle '" + h + |
---|
98 | "' contains leading or tailing spaces " + |
---|
99 | "within <ResourceRef> element"); |
---|
100 | } |
---|
101 | } |
---|
102 | } |
---|
103 | |
---|
104 | if (handle != null) { |
---|
105 | checkHandleURISyntax(handle, result, line, column); |
---|
106 | } else { |
---|
107 | result.reportError(line, column, |
---|
108 | "invalid handle (<ResourceRef> was empty)"); |
---|
109 | } |
---|
110 | } |
---|
111 | } catch (SaxonApiException e) { |
---|
112 | throw new CMDIValidatorException("failed to check handles", e); |
---|
113 | } |
---|
114 | } |
---|
115 | |
---|
116 | |
---|
117 | private void checkHandleURISyntax(String handle, |
---|
118 | CMDIValidatorWriteableResult result, int line, int column) |
---|
119 | throws CMDIValidatorException { |
---|
120 | try { |
---|
121 | final URI uri = new URI(handle); |
---|
122 | if (HDL_SCHEME.equalsIgnoreCase(uri.getScheme())) { |
---|
123 | String path = uri.getSchemeSpecificPart(); |
---|
124 | if (!path.startsWith("/")) { |
---|
125 | path = "/" + path; |
---|
126 | } |
---|
127 | try { |
---|
128 | final URI actionableURI = |
---|
129 | new URI(HDL_PROXY_HTTP, HDL_PROXY_HOST, path, null); |
---|
130 | checkHandleResolves(actionableURI, result, line, column); |
---|
131 | } catch (URISyntaxException e) { |
---|
132 | /* should not happen */ |
---|
133 | throw new CMDIValidatorException( |
---|
134 | "created an invalid URI", e); |
---|
135 | } |
---|
136 | } else if (URN_SCHEME.equals(uri.getScheme())) { |
---|
137 | if (resolveHandles) { |
---|
138 | result.reportInfo(line, column, "PID '" + handle + |
---|
139 | "' skipped, because URN resolving is not supported"); |
---|
140 | } else { |
---|
141 | result.reportInfo(line, column, "PID '" + handle + |
---|
142 | "' skipped, because URN sytax checking is not supported"); |
---|
143 | } |
---|
144 | } else if (HDL_PROXY_HTTP.equalsIgnoreCase(uri.getScheme()) || |
---|
145 | HDL_PROXY_HTTPS.equalsIgnoreCase(uri.getScheme())) { |
---|
146 | if (uri.getHost() != null) { |
---|
147 | if (!HDL_PROXY_HOST.equalsIgnoreCase(uri.getHost())) { |
---|
148 | result.reportError(line, column, |
---|
149 | "The URI of PID '" + handle + |
---|
150 | "' contains an unexpected host part of '" + |
---|
151 | uri.getHost() + "'"); |
---|
152 | } |
---|
153 | checkHandleResolves(uri, result, line, column); |
---|
154 | } else { |
---|
155 | result.reportError(line, column, "The URI of PID '" + |
---|
156 | handle + "' is missing the host part"); |
---|
157 | } |
---|
158 | } else { |
---|
159 | if (uri.getScheme() != null) { |
---|
160 | result.reportError(line, column, |
---|
161 | "The URI of PID '" + handle + |
---|
162 | "' contains an unexpected schema part of '" + |
---|
163 | uri.getScheme() + "'"); |
---|
164 | } else { |
---|
165 | result.reportError(line, column, "The URI of PID '" + |
---|
166 | handle + "' is missing a proper schema part"); |
---|
167 | } |
---|
168 | } |
---|
169 | } catch (URISyntaxException e) { |
---|
170 | result.reportError(line, column, "PID '" + handle + |
---|
171 | "' is not a well-formed URI: " + e.getMessage()); |
---|
172 | } |
---|
173 | } |
---|
174 | |
---|
175 | |
---|
176 | private void checkHandleResolves(URI uri, |
---|
177 | CMDIValidatorWriteableResult result, int line, int column) |
---|
178 | throws CMDIValidatorException { |
---|
179 | if (resolver != null) { |
---|
180 | try { |
---|
181 | int code = resolver.resolve(uri); |
---|
182 | switch (code) { |
---|
183 | case HttpStatus.SC_OK: |
---|
184 | /* no special message in this case */ |
---|
185 | break; |
---|
186 | case HttpStatus.SC_UNAUTHORIZED: |
---|
187 | /* FALL-THROUGH */ |
---|
188 | case HttpStatus.SC_FORBIDDEN: |
---|
189 | result.reportInfo(line, column, "PID '" + uri + |
---|
190 | "' resolved to an access protected resource (" + |
---|
191 | code + ")"); |
---|
192 | break; |
---|
193 | case HttpStatus.SC_NOT_FOUND: |
---|
194 | result.reportError(line, column, "PID '" + uri + |
---|
195 | "' resolved to an non-existing resource (" + |
---|
196 | code + ")"); |
---|
197 | break; |
---|
198 | case HandleResolver.TIMEOUT: |
---|
199 | result.reportWarning(line, column, |
---|
200 | "Timeout while resolving PID '" + uri + "'"); |
---|
201 | break; |
---|
202 | case HandleResolver.UNKNOWN_HOST: |
---|
203 | result.reportWarning(line, column, |
---|
204 | "Unable to resolve host '" + uri.getHost() + |
---|
205 | "' while resolving PID '" + uri + "'"); |
---|
206 | break; |
---|
207 | case HandleResolver.ERROR: |
---|
208 | result.reportWarning(line, column, |
---|
209 | "An error occurred while resolving PID '" + |
---|
210 | uri + "'"); |
---|
211 | break; |
---|
212 | default: |
---|
213 | result.reportWarning(-line, column, "PID '" + uri + |
---|
214 | "' resolved with an unexpected result (" + |
---|
215 | code + ")"); |
---|
216 | break; |
---|
217 | } // switch |
---|
218 | } catch (IOException e) { |
---|
219 | throw new CMDIValidatorException( |
---|
220 | "error while resolving handle '" + uri + "'", e); |
---|
221 | } |
---|
222 | } |
---|
223 | } |
---|
224 | |
---|
225 | } // CheckHandleExtension |
---|