Context Navigation

source: SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/search/Search.java @ 5900

Last change on this file since 5900 was 5900, checked in by emanuel.dima@uni-tuebingen.de, 9 years ago
alpha 9: corpus view UI improvements, bug fixes
File size: 6.8 KB

Line
1	package eu.clarin.sru.fcs.aggregator.search;
2
3	import eu.clarin.sru.client.SRUVersion;
4	import java.util.List;
5	import eu.clarin.sru.client.SRUClientException;
6	import eu.clarin.sru.client.SRUSearchRetrieveRequest;
7	import eu.clarin.sru.client.SRUSearchRetrieveResponse;
8	import eu.clarin.sru.client.fcs.ClarinFCSRecordData;
9	import eu.clarin.sru.fcs.aggregator.client.ThrottledClient;
10	import eu.clarin.sru.fcs.aggregator.scan.Corpus;
11	import eu.clarin.sru.fcs.aggregator.scan.Statistics;
12	import eu.clarin.sru.fcs.aggregator.util.SRUCQL;
13	import java.util.ArrayList;
14	import java.util.Collections;
15	import java.util.Random;
16	import java.util.concurrent.atomic.AtomicLong;
17	import java.util.logging.Level;
18	import java.util.logging.Logger;
19	import opennlp.tools.tokenize.TokenizerModel;
20	import org.slf4j.LoggerFactory;
21
22	/**
23	* Class representing a search operation
24	*
25	* @author Yana Panchenko
26	* @author edima
27	*/
28	public class Search {
29
30	private static final org.slf4j.Logger log = LoggerFactory.getLogger(Search.class);
31
32	private static final String SEARCH_RESULTS_ENCODING = "UTF-8";
33
34	private static final AtomicLong counter = new AtomicLong(Math.abs(new Random().nextInt()));
35
36	private final Long id;
37	private final String searchLanguage;
38	private final List<Request> requests = Collections.synchronizedList(new ArrayList<Request>());
39	private final List<Result> results = Collections.synchronizedList(new ArrayList<Result>());
40	private final Statistics statistics;
41
42	public Search(ThrottledClient searchClient, SRUVersion version,
43	Statistics statistics, List<Corpus> corpora,
44	String searchString, String searchLanguage, int startRecord, int maxRecords
45	) {
46	this.id = counter.getAndIncrement();
47	this.searchLanguage = searchLanguage;
48	this.statistics = statistics;
49	for (Corpus corpus : corpora) {
50	executeSearch(searchClient, version, corpus, searchString, startRecord, maxRecords);
51	}
52	}
53
54	private Request executeSearch(ThrottledClient searchClient, SRUVersion version, final Corpus corpus, String searchString, int startRecord, int maxRecords) {
55	final Request request = new Request(corpus, searchString, startRecord, startRecord + maxRecords - 1);
56	log.info("Executing search in '{}' query='{}' maxRecords='{}'", corpus, searchString, maxRecords);
57
58	SRUSearchRetrieveRequest searchRequest = new SRUSearchRetrieveRequest(corpus.getEndpointUrl());
59	searchRequest.setVersion(version);
60	searchRequest.setMaximumRecords(maxRecords);
61	searchRequest.setRecordSchema(ClarinFCSRecordData.RECORD_SCHEMA);
62	searchRequest.setQuery("\"" + searchString + "\"");
63	searchRequest.setStartRecord(startRecord);
64	if (request.hasCorpusHandle()) {
65	searchRequest.setExtraRequestData(SRUCQL.SEARCH_CORPUS_HANDLE_PARAMETER, corpus.getHandle());
66	}
67	requests.add(request);
68
69	try {
70	searchClient.searchRetrieve(searchRequest, new ThrottledClient.SearchCallback() {
71	@Override
72	public void onSuccess(SRUSearchRetrieveResponse response, ThrottledClient.Stats stats) {
73	statistics.addEndpointDatapoint(corpus.getInstitution(), corpus.getEndpointUrl(), stats.getQueueTime(), stats.getExecutionTime());
74	results.add(new Result(request, response, null));
75	requests.remove(request);
76	}
77
78	@Override
79	public void onError(SRUSearchRetrieveRequest srureq, SRUClientException xc, ThrottledClient.Stats stats) {
80	statistics.addEndpointDatapoint(corpus.getInstitution(), corpus.getEndpointUrl(), stats.getQueueTime(), stats.getExecutionTime());
81	statistics.addErrorDatapoint(corpus.getInstitution(), corpus.getEndpointUrl(), xc);
82	results.add(new Result(request, null, xc));
83	requests.remove(request);
84	}
85	});
86	} catch (Throwable xc) {
87	log.error("SearchRetrieve error for " + corpus.getEndpointUrl(), xc);
88	}
89	return request;
90	}
91
92	public Long getId() {
93	return id;
94	}
95
96	public List<Request> getRequests() {
97	List<Request> copy = new ArrayList<>();
98	synchronized (requests) {
99	copy.addAll(requests);
100	}
101	return copy;
102	}
103
104	public List<Result> getResults() {
105	List<Result> copy = new ArrayList<>();
106	synchronized (results) {
107	copy.addAll(results);
108	}
109	return copy;
110	}
111
112	public Statistics getStatistics() {
113	return statistics;
114	}
115
116	public void exportTCF(TokenizerModel tokenizerModel) throws ExportException {
117	byte[] bytes = Exports.getExportTokenizedTCF(results, searchLanguage, tokenizerModel);
118	if (bytes != null) {
119	Filedownload.save(bytes, "text/tcf+xml", "ClarinDFederatedContentSearch.xml");
120	}
121	}
122
123	public void exportText() {
124	String text = Exports.getExportText(results);
125	if (text != null) {
126	Filedownload.save(text, "text/plain", "ClarinDFederatedContentSearch.txt");
127	}
128	}
129
130	void exportExcel() throws ExportException {
131	byte[] bytes = Exports.getExportExcel(results);
132	if (bytes != null) {
133	Filedownload.save(bytes, "text/tcf+xml", "ClarinDFederatedContentSearch.xls");
134	}
135	}
136
137	void exportPWText(String user, String pass) {
138	byte[] bytes = null;
139	try {
140	String text = Exports.getExportText(results);
141	if (text != null) {
142	bytes = text.getBytes(SEARCH_RESULTS_ENCODING);
143	}
144	} catch (Exception ex) {
145	Logger.getLogger(Search.class.getName()).log(Level.SEVERE, null, ex);
146	}
147	if (bytes != null) {
148	DataTransfer.uploadToPW(user, pass, bytes, "text/plan", ".txt");
149	}
150	}
151
152	String useWebLichtOnText() {
153	String url = null;
154	try {
155	String text = Exports.getExportText(results);
156	if (text != null) {
157	byte[] bytes = text.getBytes(SEARCH_RESULTS_ENCODING);
158	url = DataTransfer.uploadToDropOff(bytes, "text/plan", ".txt");
159	}
160	} catch (Exception ex) {
161	Logger.getLogger(Search.class.getName()).log(Level.SEVERE, null, ex);
162	}
163	return url;
164	}
165
166	String useWebLichtOnToks(TokenizerModel tokenizerModel) throws ExportException {
167	String url = null;
168	byte[] bytes = Exports.getExportTokenizedTCF(results, searchLanguage, tokenizerModel);
169	if (bytes != null) {
170	url = DataTransfer.uploadToDropOff(bytes, "text/tcf+xml", ".tcf");
171	}
172	return url;
173	}
174
175	void exportPWExcel(String user, String pass) throws ExportException {
176	byte[] bytes = Exports.getExportExcel(results);
177	if (bytes != null) {
178	DataTransfer.uploadToPW(user, pass, bytes, "application/vnd.ms-excel", ".xls");
179	}
180	}
181
182	public void exportPWTCF(String user, String pass, TokenizerModel tokenizerModel) throws ExportException {
183	byte[] bytes = Exports.getExportTokenizedTCF(results, searchLanguage, tokenizerModel);
184	if (bytes != null) {
185	DataTransfer.uploadToPW(user, pass, bytes, "text/tcf+xml", ".tcf");
186	}
187	}
188
189	public void exportCSV() {
190	String csv = Exports.getExportCSV(results, ";");
191	if (csv != null) {
192	Filedownload.save(csv, "text/plain", "ClarinDFederatedContentSearch.csv");
193	}
194	}
195
196	public void exportPWCSV(String user, String pass) {
197	String csv = Exports.getExportCSV(results, ";");
198	if (csv != null) {
199	DataTransfer.uploadToPW(user, pass, csv.getBytes(), "text/csv", ".csv");
200	}
201	}
202
203	public void shutdown() {
204	// nothing to do
205	}
206	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: