Context Navigation

source: SRUAggregator/trunk/src/java/clarind/fcs/Harvester.java @ 2107

Last change on this file since 2107 was 2107, checked in by zastrow, 12 years ago

File size: 5.1 KB

Line
1	package clarind.fcs;
2
3	import java.io.InputStream;
4	import java.net.URL;
5	import java.util.*;
6	import javax.xml.parsers.DocumentBuilder;
7	import javax.xml.parsers.DocumentBuilderFactory;
8	import javax.xml.xpath.XPath;
9	import javax.xml.xpath.XPathConstants;
10	import javax.xml.xpath.XPathExpressionException;
11	import javax.xml.xpath.XPathFactory;
12	import org.w3c.dom.Node;
13	import org.w3c.dom.NodeList;
14
15	public class Harvester {
16
17	final String crStartpoint = "http://130.183.206.32/restxml/";
18
19	private NodeList evaluateXPath(String statement, org.w3c.dom.Document domtree) {
20	NodeList result = null;
21
22	XPath xpath = XPathFactory.newInstance().newXPath();
23	try {
24	result = (NodeList) xpath.evaluate(statement, domtree, XPathConstants.NODESET);
25	} catch (XPathExpressionException ex) {
26	System.out.println(ex.getMessage());
27	}
28	return result;
29	}
30
31	public String evaluateXPathToString(String statement, org.w3c.dom.Document domtree) {
32	String result = null;
33
34	XPath xpath = XPathFactory.newInstance().newXPath();
35	try {
36	result = (String) xpath.evaluate(statement, domtree, XPathConstants.STRING);
37	} catch (XPathExpressionException ex) {
38	System.out.println(ex.getMessage());
39	}
40	return result;
41	}
42
43	public ArrayList<Endpoint> getEndpoints() throws Exception {
44	ArrayList<Endpoint> ep = new ArrayList<Endpoint>();
45
46	URL u = new URL(crStartpoint);
47	InputStream is = u.openStream();
48
49	DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
50	DocumentBuilder builder = factory.newDocumentBuilder();
51	org.w3c.dom.Document document = builder.parse(is);
52
53	is.close();
54	String instituteName = evaluateXPathToString("//Name", document);
55
56	NodeList institutionsUrls = evaluateXPath("//Center_id_link", document);
57
58	int i, i2;
59
60	for (i = 0; i < institutionsUrls.getLength(); i++) {
61	u = new URL(institutionsUrls.item(i).getTextContent());
62	is = u.openStream();
63
64	org.w3c.dom.Document doc = builder.parse(is);
65	is.close();
66	////WebReference[./Description[text()="CQL"]]/Website
67
68	NodeList endpointsUrls = evaluateXPath("//WebReference[./Description[text()=\"CQL\"]]/Website", doc);
69
70	for (i2 = 0; i2 < endpointsUrls.getLength(); i2++) {
71	String epUrl = endpointsUrls.item(i2).getTextContent();
72	ep.add(new Endpoint(epUrl, instituteName));
73	} // for i2
74
75	} // for i ...
76
77
78	return ep;
79	} //getEndpoints
80
81	public ArrayList<Corpus> getCorporaOfAnEndpoint(String endpointUrl) throws Exception {
82
83	ArrayList<Corpus> corpora = new ArrayList<Corpus>();
84	String urlToCall = endpointUrl + "?operation=scan&scanClause=fcs.resource&version=1.2";
85	URL u = new URL(urlToCall);
86
87	System.out.println("getCorporaOfAnEndpoint: " + urlToCall);
88
89	InputStream is = u.openStream();
90
91	DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
92	DocumentBuilder builder = factory.newDocumentBuilder();
93	org.w3c.dom.Document document = builder.parse(is);
94
95	is.close();
96
97	//http://clarinws.informatik.uni-leipzig.de:8080/CQL?
98
99	//NodeList corporaNodes = evaluateXPath("//[local-name()='term']/[local-name()='value']", document);
100	NodeList corporaNodes = evaluateXPath("//[local-name()='terms']/[local-name()='term']", document);
101
102	int i, i2;
103	if (corporaNodes.getLength() > 0) {
104
105	System.out.println("Length of corpora: " + corporaNodes.getLength());
106
107	for (i = 0; i < corporaNodes.getLength(); i++) {
108	Node n = corporaNodes.item(i);
109
110	System.out.println("NODENAEM: " + n.getNodeName());
111
112	Corpus c = new Corpus();
113
114	for (i2 = 0; i2 < n.getChildNodes().getLength(); i2++) {
115	Node child = n.getChildNodes().item(i2);
116
117	if (child.getNodeName().endsWith("value")) {
118	c.setValue(child.getTextContent());
119	}
120
121	if (child.getNodeName().endsWith("displayTerm")) {
122	c.setDisplayTerm(child.getTextContent());
123	}
124
125	if (child.getNodeName().endsWith("numberOfRecords")) {
126	c.setNumberOfRecords(child.getTextContent());
127	}
128
129	} //for i2
130
131	corpora.add(c);
132
133	} // for i ...
134	} // if coporaNodes ...
135
136	System.out.println("------------");
137	return corpora;
138	} // getCorporaOfAnEndpoint
139
140	public static void main(String[] args) throws Exception {
141	Harvester cr = new Harvester();
142	ArrayList<Endpoint> ep = cr.getEndpoints();
143
144	int i;
145
146	for (i = 0; i < ep.size(); i++) {
147	System.out.println(ep.get(i).getInstitution() + " " + ep.get(i).getUrl());
148	} // for i ...
149
150
151	}
152	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: