Context Navigation

source: SRUAggregator/trunk/src/java/clarind/fcs/Harvester.java @ 2097

Last change on this file since 2097 was 2097, checked in by zastrow, 12 years ago

File size: 4.1 KB

Line
1	package clarind.fcs;
2
3	import java.io.InputStream;
4	import java.net.URL;
5	import java.util.*;
6	import javax.xml.parsers.DocumentBuilder;
7	import javax.xml.parsers.DocumentBuilderFactory;
8	import javax.xml.xpath.XPath;
9	import javax.xml.xpath.XPathConstants;
10	import javax.xml.xpath.XPathExpressionException;
11	import javax.xml.xpath.XPathFactory;
12	import org.w3c.dom.Node;
13	import org.w3c.dom.NodeList;
14
15	public class Harvester {
16
17	final String crStartpoint = "http://130.183.206.32/restxml/";
18
19	private NodeList evaluateXPath(String statement, org.w3c.dom.Document domtree){
20	NodeList result = null;
21
22	XPath xpath = XPathFactory.newInstance().newXPath();
23	try {
24	result = (NodeList) xpath.evaluate(statement, domtree, XPathConstants.NODESET);
25	} catch (XPathExpressionException ex) {
26	System.out.println(ex.getMessage());
27	}
28	return result;
29	}
30
31	public String evaluateXPathToString(String statement, org.w3c.dom.Document domtree) {
32	String result = null;
33
34	XPath xpath = XPathFactory.newInstance().newXPath();
35	try {
36	result = (String) xpath.evaluate(statement, domtree, XPathConstants.STRING);
37	} catch (XPathExpressionException ex) {
38	System.out.println(ex.getMessage());
39	}
40	return result;
41	}
42
43
44	public ArrayList<Endpoint> getEndpoints() throws Exception {
45	ArrayList<Endpoint> ep = new ArrayList<Endpoint>();
46
47	URL u = new URL(crStartpoint);
48	InputStream is = u.openStream();
49
50	DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
51	DocumentBuilder builder = factory.newDocumentBuilder();
52	org.w3c.dom.Document document = builder.parse(is);
53
54	is.close();
55	String instituteName = evaluateXPathToString("//Name", document);
56
57	NodeList institutionsUrls = evaluateXPath("//Center_id_link", document);
58
59	int i, i2;
60
61	for(i=0; i<institutionsUrls.getLength();i++){
62	u = new URL(institutionsUrls.item(i).getTextContent());
63	is = u.openStream();
64
65	org.w3c.dom.Document doc = builder.parse(is);
66	is.close();
67	////WebReference[./Description[text()="CQL"]]/Website
68
69	NodeList endpointsUrls = evaluateXPath("//WebReference[./Description[text()=\"CQL\"]]/Website", doc);
70
71	for(i2=0; i2<endpointsUrls.getLength();i2++){
72	String epUrl = endpointsUrls.item(i2).getTextContent();
73	ep.add(new Endpoint(epUrl, instituteName));
74	} // for i2
75
76	} // for i ...
77
78
79	return ep;
80	} //getEndpoints
81
82
83	public ArrayList<String> getCorporaOfAnEndpoint(String endpointUrl) throws Exception {
84	System.out.println("getCorporaOfAnEndpoint: " + endpointUrl);
85	ArrayList<String> corpora = new ArrayList<String>();
86
87	URL u = new URL(endpointUrl + "?operation=scan&scanClause=fcs.resource");
88	InputStream is = u.openStream();
89
90	DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
91	DocumentBuilder builder = factory.newDocumentBuilder();
92	org.w3c.dom.Document document = builder.parse(is);
93
94	is.close();
95
96	//http://clarinws.informatik.uni-leipzig.de:8080/CQL?
97
98	NodeList corporaNodes = evaluateXPath("//[local-name()='term']/[local-name()='value']", document);
99
100	int i, i2;
101
102	for(i=0; i<corporaNodes.getLength();i++){
103	corpora.add(corporaNodes.item(i).getTextContent());
104
105	} // for i ...
106	return corpora;
107	} // getCorporaOfAnEndpoint
108
109
110	public static void main (String[] args) throws Exception {
111	Harvester cr = new Harvester();
112	ArrayList<Endpoint> ep = cr.getEndpoints();
113
114	int i;
115
116	for(i=0; i<ep.size();i++){
117	System.out.println(ep.get(i).getInstitution() + " " + ep.get(i).getUrl());
118	} // for i ...
119
120
121	}
122
123
124
125
126	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: