Context Navigation

ExtraTermDataXPath.java

Last change on this file was 6059, checked in by olhsha@mpi.nl, 9 years ago
importing the impi endpoint with the example of usage jaxb for generating java classes from schemata,and usage of guava library for cache
File size: 12.3 KB

Line
1	/*
2	* To change this template, choose Tools \| Templates
3	* and open the template in the editor.
4	*/
5	package nl.mpi.annot.search.cqlsearch2;
6
7	import java.io.IOException;
8	import java.io.InputStream;
9	import java.net.MalformedURLException;
10	import java.net.URL;
11	import java.net.URLConnection;
12	import java.util.ArrayList;
13	import java.util.HashMap;
14	import java.util.Hashtable;
15	import javax.xml.parsers.DocumentBuilder;
16	import javax.xml.parsers.DocumentBuilderFactory;
17	import javax.xml.parsers.ParserConfigurationException;
18	import javax.xml.xpath.XPath;
19	import javax.xml.xpath.XPathConstants;
20	import javax.xml.xpath.XPathExpressionException;
21	import javax.xml.xpath.XPathFactory;
22	import nl.mpi.corpusstructure.CorpusStructureDBImpl;
23	import nl.mpi.corpusstructure.Node;
24	import nl.mpi.corpusstructure.UnknownNodeException;
25	import nl.mpi.imdi.api.IMDIDom;
26	import nl.mpi.util.OurURL;
27	import org.slf4j.Logger;
28	import org.slf4j.LoggerFactory;
29	import org.w3c.dom.Document;
30	import org.w3c.dom.NodeList;
31	import org.xml.sax.SAXException;
32
33	/**
34	*
35	* @author olhsha
36	*/
37	public class ExtraTermData {
38
39	private HashMap<String, String> _nodeTitle=null; // to do; make it a hash-map: language->title
40	private String _landingPageURL = null;
41	private ArrayList<String> _languages = null;
42	private boolean _hasSubResources = false;
43	private HashMap<String, String> _descriptions = null;
44	private Logger _logger = LoggerFactory.getLogger(ExtraTermData.class);
45
46	public ExtraTermData(CorpusStructureDBImpl db, String nodeId, String user) {
47
48	try {
49	_landingPageURL = db.getObjectURI(nodeId).toString();
50	Node[] nodes = db.getChildrenNodes(nodeId);
51	_hasSubResources = (nodes.length > 0);
52
53	// find the catalogue
54	int catalogueIndex = catalogueIsFound(nodes);
55	if (catalogueIndex > -1) {
56	setExtraTermDataFromCatalogue(db, nodes[catalogueIndex]);
57	}
58	else {
59	setEmptyExtraTermData();
60	}
61
62	} catch (UnknownNodeException e) {
63	_logger.error(nodeId + " is an unknown node", e);
64	}
65
66	}
67
68	private int catalogueIsFound(Node[] nodes) {
69	for (int i = 0; i < nodes.length; i++) {
70	if (nodes[i].getNodeType() == Node.CATALOGUE) {
71	return i;
72	}
73	}
74	return -1;
75	}
76
77	private void setExtraTermDataFromCatalogue(CorpusStructureDBImpl db, Node catalogue) {
78	URL catalogueURL = null;
79	// InputStream catalogueStream = null;
80	// DocumentBuilder docBuilder = null;
81	// URLConnection con = null;
82
83	try {
84	catalogueURL = db.getObjectURI(catalogue.getNodeId()).toURL();
85	} catch (MalformedURLException catalogueURIException) {
86	_logger.error("Cannot translate catalogue's URI " + db.getObjectURI(catalogue.getNodeId()) + " into an URL", catalogueURIException);
87	_logger.error("Extra term data stay at their default empty setting");
88	return;
89	}
90
91	IMDIDom dom = new IMDIDom();
92	Hashtable hasTable = null;
93	Document doc = null;
94
95	try {
96	doc = dom.loadIMDIDocument(new OurURL(catalogueURL), false);
97	hasTable=(dom.getKeyValuePairs(doc, "Catalogue"));
98	} catch (MalformedURLException e){
99	_logger.error("Cannot turn " + catalogueURL+" into OurURL", e);
100	_logger.error("Extra term data stay at their default empty setting");
101	return;
102	}
103
104	/*
105	try {
106	con = catalogueURL.openConnection();
107	con.setConnectTimeout(5000);
108	con.setReadTimeout(5000);
109	catalogueStream = con.getInputStream();
110	} catch (IOException openCatalogueException) {
111	_logger.error("Cannot get strean form the catalogue's URL " + catalogueURL, openCatalogueException);
112	_logger.error("Extra term data stay at their default empty setting");
113	return;
114	}
115
116	try {
117	DocumentBuilderFactory fac = DocumentBuilderFactory.newInstance();
118	docBuilder = fac.newDocumentBuilder();
119	} catch (ParserConfigurationException parserE) {
120	_logger.error("Cannot create parser for the catalogue " + catalogueURL, parserE);
121	_logger.error("Extra term data stay at their default empty setting");
122	return;
123	}
124
125	try {
126	doc = docBuilder.parse(catalogueStream);
127	catalogueStream.close();
128	} catch (SAXException saxE) {
129	_logger.error("Cannot parse the catalogues stream obtained from " + catalogueURL, saxE);
130	_logger.error("Extra term data stay at their default empty setting");
131	return;
132	} catch (IOException ioE) {
133	_logger.error("Cannot parse the catalogue's stream obtained from " + catalogueURL, ioE);
134	_logger.error("Extra term data stay at their default empty setting");
135	return;
136	} */
137
138
139	//XPathFactory factory = XPathFactory.newInstance();
140	_nodeTitle = getTitles(hasTable, doc, dom);
141	_languages = null;
142	_descriptions = null;
143
144	//_languages = getLanguages(factory, doc, catalogueURL);
145	//_descriptions = getDescriptions(factory, doc, catalogueURL);
146	}
147
148	//private HashMap<String, String> getTitles(XPathFactory factory, Document doc, URL catalogueURL) {
149
150	private HashMap<String, String> getTitles(Hashtable hashTable, Document doc, IMDIDom dom) {
151
152	HashMap<String, String> result = new HashMap<String, String>();
153
154	String englishTitle = hashTable.get("EnglishTitle").toString();
155	result.put(englishTitle, "eng");
156
157	String otherTitles = (dom.getIMDIElement(doc, "Catalogue.Title")).getValue();
158	//int amountOfOtherTitles = (otherTitles == null) ? 0 : otherTitles.getLength();
159
160	String otherLanguages = hashTable.get("TitleLanguage").toString();
161	//int amountOfOtherLanguages = (otherLanguages == null) ? 0 : otherLanguages.getLength();
162
163	/*if (amountOfOtherTitles != amountOfOtherLanguages) {
164	_logger.error("The amount of titles does not match the amount of title languages in the catalogue");
165	}
166	for (int i = 0; i < amountOfOtherTitles; i++) {
167	result.put(otherTitles.item(i).getTextContent(), otherLanguages.item(i).getTextContent());
168	}*/
169	result.put(otherTitles, otherLanguages);
170	return result;
171	}
172
173	/////
174	// private String getEnglishTitle(XPathFactory factory, Document doc, URL catalogueURL) {
175	/*
176	NodeList xmlNodes = null;
177	XPath xpath = factory.newXPath();
178	String xpathEnglishTitle = "/METATRANSCRIPT/Catalogue/Keys/Key[@Name=\"EnglishTitle\"]";
179	try {
180	xmlNodes = (NodeList) xpath.evaluate(xpathEnglishTitle, doc, XPathConstants.NODESET);
181	} catch (XPathExpressionException xpathE) {
182	_logger.error("Cannot parse the English Title obtained from the catalogue " + catalogueURL, xpathE);
183	_logger.error("At least one title must be given according to FCS specification");
184	}
185
186	String result = ((xmlNodes != null) ? (xmlNodes.getLength() > 0 ? xmlNodes.item(0).getTextContent() : "No English title is given") : "No English title is given");
187
188	*/
189	/* IMDIDom dom = new IMDIDom();
190	try {
191	Document docNew = dom.loadIMDIDocument(new OurURL(catalogueURL), false);
192	Hashtable tmpTable=(dom.getKeyValuePairs(docNew, "Catalogue"));
193	String result = tmpTable.get("EnglishTitle").toString();
194	return result;
195	} catch (MalformedURLException e){
196	_logger.error("Cannot parse the English Title obtained from the catalogue " + catalogueURL, e);
197	}
198
199	return " ";
200	}*/
201
202	/////
203	/*private NodeList getOtherTitles(Hashtable hashtable) {
204	NodeList result = null;
205	XPath xpathOthers = factory.newXPath();
206	String xpathTitle = "/METATRANSCRIPT/Catalogue/Title";
207	try {
208	result = (NodeList) xpathOthers.evaluate(xpathTitle, doc, XPathConstants.NODESET);
209	} catch (XPathExpressionException xpathE) {
210	_logger.error("Cannot parse the Title obtained from the catalogue " + catalogueURL, xpathE);
211	}
212
213
214	return result;
215	}*/
216
217	////
218	/*private NodeList getLanguagesOfOtherTitles(Hashtable hashTable) {
219	NodeList result = null;
220	XPath xpathOthersLanguages = factory.newXPath();
221	String xpathTitleLanguages = "/METATRANSCRIPT/Catalogue/Keys/Key[@Name=\"TitleLanguage\"]";
222	try {
223	result = (NodeList) xpathOthersLanguages.evaluate(xpathTitleLanguages, doc, XPathConstants.NODESET);
224	} catch (XPathExpressionException xpathE) {
225	_logger.error("Cannot parse the title language obtained from the catalogue " + catalogueURL, xpathE);
226	}
227
228	return result;
229	}*/
230
231	/////////////
232	private ArrayList<String> getLanguages(XPathFactory factory, Document doc, URL catalogueURL) {
233	XPath xpath = factory.newXPath();
234	String xpathLanguages = "/METATRANSCRIPT/Catalogue/SubjectLanguages/Language/Id";
235	try {
236	NodeList xmlNodes = (NodeList) xpath.evaluate(xpathLanguages, doc, XPathConstants.NODESET);
237	int lengthLanguages = (xmlNodes == null) ? 0 : xmlNodes.getLength();
238	if (lengthLanguages == 0) {
239	_logger.error("At least one language must be given according to FCs specification");
240	return null;
241	}
242	ArrayList<String> result = new ArrayList<String>();
243	for (int i = 0; i < lengthLanguages; i++) {
244	result.add(CQLHelpers.removeISO369prefix(xmlNodes.item(i).getTextContent()));
245	}
246	return result;
247	} catch (XPathExpressionException xpathE) {
248	_logger.error("Cannot parse the languages in the catalogue " + catalogueURL, xpathE);
249	_logger.error("At least one language must be given according to FCs specification");
250	}
251	return null;
252	}
253
254	private HashMap<String, String> getDescriptions(XPathFactory factory, Document doc, URL catalogueURL) {
255	XPath xpath = factory.newXPath();
256	String xpathDescriptions = "/METATRANSCRIPT/Catalogue/Description";
257	try {
258	NodeList xmlNodes = (NodeList) xpath.evaluate(xpathDescriptions, doc, XPathConstants.NODESET);
259	int lengthDescriptions = (xmlNodes == null) ? 0 : xmlNodes.getLength();
260	if (lengthDescriptions == 0) {
261	_logger.info("There are no descriptions for this corpus");
262	return null;
263	}
264	HashMap<String, String> result = new HashMap<String, String>();
265	String currentLanguage = null;
266	String currentDescriptionText = null;
267	for (int i = 0; i < lengthDescriptions; i++) {
268	currentLanguage = CQLHelpers.removeISO369prefix(xmlNodes.item(i).getAttributes().getNamedItem("LanguageId").getTextContent());
269	currentDescriptionText = xmlNodes.item(i).getTextContent();
270	result.put(currentLanguage, currentDescriptionText);
271	}
272	return result;
273	} catch (XPathExpressionException xpathE) {
274	_logger.info("Cannot parse the descriptions obtained from the catalogue " + catalogueURL, xpathE);
275	_logger.info("There will be no descriptions for this corpus");
276	}
277	return null;
278	}
279
280	private void setEmptyExtraTermData(){
281	_nodeTitle = new HashMap<String, String>();
282	_nodeTitle.put("eng", "No extra-term data are provided for non-root searchabale corpora");
283	_landingPageURL = "??";
284	_languages = new ArrayList<String>();
285	_languages.add("??");
286	_hasSubResources=false;
287	_descriptions = new HashMap<String, String>();
288	_descriptions.put("??", "??");
289	}
290
291	public HashMap<String, String> getNodeTitle(){
292	return _nodeTitle;
293	}
294
295	public String getLandingPageURL(){
296	return _landingPageURL;
297	}
298
299	public ArrayList<String> getLanguages(){
300	return _languages;
301	}
302
303	public boolean getHasSubResources(){
304	return _hasSubResources;
305	}
306
307
308	public HashMap<String, String> getDescriptions(){
309	return _descriptions;
310	}
311	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: FederatedSearch/mpi-endpoint/ExtraTermDataXPath.java

Download in other formats: