source: vlo/trunk/vlo_webapp/src/main/java/eu/clarin/cmdi/vlo/importer/CMDIDigester.java @ 992

Last change on this file since 992 was 992, checked in by paucas, 13 years ago
  • changed minor lowercasing spelling
File size: 6.0 KB
Line 
1package eu.clarin.cmdi.vlo.importer;
2
3import java.io.File;
4import java.io.FileInputStream;
5import java.io.IOException;
6import java.util.List;
7
8import javax.xml.parsers.DocumentBuilder;
9import javax.xml.parsers.DocumentBuilderFactory;
10import javax.xml.parsers.ParserConfigurationException;
11import javax.xml.xpath.XPath;
12import javax.xml.xpath.XPathConstants;
13import javax.xml.xpath.XPathExpressionException;
14import javax.xml.xpath.XPathFactory;
15
16import org.slf4j.Logger;
17import org.slf4j.LoggerFactory;
18import org.w3c.dom.Document;
19import org.w3c.dom.Node;
20import org.w3c.dom.NodeList;
21import org.xml.sax.InputSource;
22import org.xml.sax.SAXException;
23
24public class CMDIDigester {
25    private final static Logger LOG = LoggerFactory.getLogger(CMDIDigester.class);
26    private final FacetMapping facetMapping;
27    //    private XMLReader xmlReader;
28    private DocumentBuilder builder;
29
30    public CMDIDigester(FacetMapping facetMapping) {
31        this.facetMapping = facetMapping;
32        DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
33        domFactory.setNamespaceAware(true);
34        try {
35            builder = domFactory.newDocumentBuilder();
36        } catch (ParserConfigurationException e) {
37            throw new RuntimeException("Cannot instantiate documentBuilder:", e);
38        }
39        //        try {
40        //            xmlReader = createXmlReader();
41        //        } catch (SAXException e) {
42        //            throw new RuntimeException("Cannot instantiate xmlReader:", e);
43        //        }
44    }
45
46    public CMDIData process(File file) throws IOException, SAXException, XPathExpressionException {
47        CMDIData result = null;
48        InputSource inputSource = new InputSource(new FileInputStream(file));
49        inputSource.setSystemId(file.toString());
50        XPath xpath = XPathFactory.newInstance().newXPath();
51        result = createCMDIData(xpath, inputSource);
52
53        /**
54         * Do not reuse the digester it holds state on bad parses. We can reuse the xmlReader. Creating a new Digester or reusing an
55         * instance gives similar performance.
56         * @see org.apache.commons.digester.Digester
57         */
58        //result = (CMDIData) createDigester().parse(inputSource);
59        return result;
60    }
61
62    private CMDIData createCMDIData(XPath xpath, InputSource inputSource) throws XPathExpressionException, SAXException, IOException {
63        CMDIData result = new CMDIData();
64        Document doc = builder.parse(inputSource);
65        Node node = (Node) xpath.evaluate(facetMapping.getIdMapping(), doc, XPathConstants.NODE);
66        if (node != null) {
67            result.setId(node.getNodeValue());
68        }
69        NodeList nodes = (NodeList) xpath.evaluate("CMD/Resources/ResourceProxyList/ResourceProxy", doc, XPathConstants.NODESET);
70        for (int i = 0; i < nodes.getLength(); i++) {
71            Node resourceNode = nodes.item(i);
72            Node ref = (Node) xpath.evaluate("ResourceRef/text()", resourceNode, XPathConstants.NODE);
73            Node type = (Node) xpath.evaluate("ResourceType/text()", resourceNode, XPathConstants.NODE);
74            if (ref != null && type != null) {
75                result.addResource(ref.getNodeValue(), type.getNodeValue());
76            }
77        }
78        List<FacetConfiguration> facetList = facetMapping.getFacets();
79        for (FacetConfiguration facetConfiguration : facetList) {
80            matchDocumentField(result, facetConfiguration, doc, xpath);
81        }
82        return result;
83    }
84
85    private void matchDocumentField(CMDIData result, FacetConfiguration facetConfig, Document doc, XPath xpath)
86            throws XPathExpressionException {
87        NodeList nodes = (NodeList) xpath.evaluate(facetConfig.getPattern(), doc, XPathConstants.NODESET);
88        if (nodes != null) {
89            for (int i = 0; i < nodes.getLength(); i++) {
90                result.addDocField(facetConfig.getName(), nodes.item(i).getNodeValue(), facetConfig.isCaseInsensitive());
91            }
92        } // else do nothing it is perfectly acceptable that not all data is in a cmdi file so not everything will be matched. E.G xpath expression evaluation CMDI session files will never match on CMD corpus files.
93    }
94
95    //    private Digester createDigester() {
96    //        Digester digester = new Digester(xmlReader);
97    //        digester.setValidating(false);
98    //        digester.addObjectCreate("CMD", CMDIData.class);
99    //        digester.addBeanPropertySetter(facetMapping.getIdMapping(), "id");
100    //        digester.addCallMethod("CMD/Resources/ResourceProxyList/ResourceProxy/", "addResource", 2);
101    //        digester.addCallParam("CMD/Resources/ResourceProxyList/ResourceProxy/ResourceRef", 0);
102    //        digester.addCallParam("CMD/Resources/ResourceProxyList/ResourceProxy/ResourceType", 1);
103    //        //        Map<String, String> facetMap = facetMapping.getFacetMap();
104    //        //        for (String facet : facetMap.keySet()) {
105    //        //            matchDocumentField(digester, facetMap.get(facet), facet);
106    //        //        }
107    //        return digester;
108    //    }
109    //
110    //    private void matchDocumentField(Digester digester, String pattern, String fieldName) {
111    //        String[] split = pattern.split(",@", 2);
112    //        String path = split[0];
113    //        String attribute = split.length == 2 ? split[1] : null;
114    //        digester.addCallMethod(path, "addDocField", 2);
115    //        digester.addObjectParam(path, 0, fieldName);
116    //        digester.addCallParam(path, 1, attribute);
117    //    }
118    //
119    //    private XMLReader createXmlReader() throws SAXException {
120    //        XMLReader xmlReader = XMLReaderFactory.createXMLReader();
121    //        xmlReader.setFeature("http://xml.org/sax/features/validation", true);
122    //        xmlReader.setFeature("http://xml.org/sax/features/namespaces", true);
123    //        xmlReader.setProperty("http://java.sun.com/xml/jaxp/properties/schemaLanguage", "http://www.w3.org/2001/XMLSchema");
124    //        return xmlReader;
125    //    }
126
127}
Note: See TracBrowser for help on using the repository browser.