Context Navigation

source: SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/search/Search.java @ 5957

Last change on this file since 5957 was 5957, checked in by emanuel.dima@uni-tuebingen.de, 9 years ago
alpha15: added support for new spec explains/search; misc UI corrections
File size: 7.1 KB

Line
1	package eu.clarin.sru.fcs.aggregator.search;
2
3	import eu.clarin.sru.client.SRUVersion;
4	import java.util.List;
5	import eu.clarin.sru.client.SRUClientException;
6	import eu.clarin.sru.client.SRUSearchRetrieveRequest;
7	import eu.clarin.sru.client.SRUSearchRetrieveResponse;
8	import eu.clarin.sru.client.fcs.ClarinFCSRecordData;
9	import eu.clarin.sru.fcs.aggregator.client.ThrottledClient;
10	import eu.clarin.sru.fcs.aggregator.scan.Corpus;
11	import eu.clarin.sru.fcs.aggregator.scan.FCSProtocolVersion;
12	import eu.clarin.sru.fcs.aggregator.scan.Statistics;
13	import eu.clarin.sru.fcs.aggregator.util.SRUCQL;
14	import java.util.ArrayList;
15	import java.util.Collections;
16	import java.util.Random;
17	import java.util.concurrent.atomic.AtomicLong;
18	import java.util.logging.Level;
19	import java.util.logging.Logger;
20	import opennlp.tools.tokenize.TokenizerModel;
21	import org.slf4j.LoggerFactory;
22
23	/**
24	* Class representing a search operation
25	*
26	* @author Yana Panchenko
27	* @author edima
28	*/
29	public class Search {
30
31	private static final org.slf4j.Logger log = LoggerFactory.getLogger(Search.class);
32
33	private static final String SEARCH_RESULTS_ENCODING = "UTF-8";
34
35	private static final AtomicLong counter = new AtomicLong(Math.abs(new Random().nextInt()));
36
37	private final Long id;
38	private final long createdAt = System.currentTimeMillis();
39	private final String searchLanguage;
40	private final List<Request> requests = Collections.synchronizedList(new ArrayList<Request>());
41	private final List<Result> results = Collections.synchronizedList(new ArrayList<Result>());
42	private final Statistics statistics;
43
44	public Search(ThrottledClient searchClient, SRUVersion version,
45	Statistics statistics, List<Corpus> corpora, String searchString,
46	String searchLanguage, int startRecord, int maxRecords
47	) {
48	this.id = counter.getAndIncrement();
49	this.searchLanguage = searchLanguage;
50	this.statistics = statistics;
51	for (Corpus corpus : corpora) {
52	executeSearch(searchClient, version, corpus, searchString, startRecord, maxRecords);
53	}
54	}
55
56	private Request executeSearch(ThrottledClient searchClient, SRUVersion version, final Corpus corpus, String searchString, int startRecord, int maxRecords) {
57	final Request request = new Request(corpus, searchString, startRecord, startRecord + maxRecords - 1);
58	log.info("Executing search in '{}' query='{}' maxRecords='{}'", corpus, searchString, maxRecords);
59
60	SRUSearchRetrieveRequest searchRequest = new SRUSearchRetrieveRequest(corpus.getEndpoint().getUrl());
61	searchRequest.setVersion(version);
62	searchRequest.setMaximumRecords(maxRecords);
63	// searchRequest.setRecordSchema(
64	// corpus.getEndpoint().getProtocol().equals(FCSProtocolVersion.LEGACY)
65	// ? ClarinFCSRecordData.LEGACY_RECORD_SCHEMA
66	// : ClarinFCSRecordData.RECORD_SCHEMA);
67	searchRequest.setQuery("\"" + searchString + "\"");
68	searchRequest.setStartRecord(startRecord);
69	if (corpus.getHandle() != null) {
70	searchRequest.setExtraRequestData(SRUCQL.SEARCH_CORPUS_HANDLE_PARAMETER, corpus.getHandle());
71	}
72	requests.add(request);
73
74	try {
75	searchClient.searchRetrieve(searchRequest, new ThrottledClient.SearchCallback() {
76	@Override
77	public void onSuccess(SRUSearchRetrieveResponse response, ThrottledClient.Stats stats) {
78	statistics.addEndpointDatapoint(corpus.getInstitution(), corpus.getEndpoint().getUrl(), stats.getQueueTime(), stats.getExecutionTime());
79	results.add(new Result(request, response, null));
80	requests.remove(request);
81	}
82
83	@Override
84	public void onError(SRUSearchRetrieveRequest srureq, SRUClientException xc, ThrottledClient.Stats stats) {
85	statistics.addEndpointDatapoint(corpus.getInstitution(), corpus.getEndpoint().getUrl(), stats.getQueueTime(), stats.getExecutionTime());
86	statistics.addErrorDatapoint(corpus.getInstitution(), corpus.getEndpoint().getUrl(), xc);
87	results.add(new Result(request, null, xc));
88	requests.remove(request);
89	}
90	});
91	} catch (Throwable xc) {
92	log.error("SearchRetrieve error for " + corpus.getEndpoint().getUrl(), xc);
93	}
94	return request;
95	}
96
97	public Long getId() {
98	return id;
99	}
100
101	public List<Request> getRequests() {
102	List<Request> copy = new ArrayList<>();
103	synchronized (requests) {
104	copy.addAll(requests);
105	}
106	return copy;
107	}
108
109	public List<Result> getResults() {
110	List<Result> copy = new ArrayList<>();
111	synchronized (results) {
112	copy.addAll(results);
113	}
114	return copy;
115	}
116
117	public Statistics getStatistics() {
118	return statistics;
119	}
120
121	public void exportTCF(TokenizerModel tokenizerModel) throws ExportException {
122	byte[] bytes = Exports.getExportTokenizedTCF(results, searchLanguage, tokenizerModel);
123	if (bytes != null) {
124	Filedownload.save(bytes, "text/tcf+xml", "ClarinDFederatedContentSearch.xml");
125	}
126	}
127
128	public void exportText() {
129	String text = Exports.getExportText(results);
130	if (text != null) {
131	Filedownload.save(text, "text/plain", "ClarinDFederatedContentSearch.txt");
132	}
133	}
134
135	void exportExcel() throws ExportException {
136	byte[] bytes = Exports.getExportExcel(results);
137	if (bytes != null) {
138	Filedownload.save(bytes, "text/tcf+xml", "ClarinDFederatedContentSearch.xls");
139	}
140	}
141
142	void exportPWText(String user, String pass) {
143	byte[] bytes = null;
144	try {
145	String text = Exports.getExportText(results);
146	if (text != null) {
147	bytes = text.getBytes(SEARCH_RESULTS_ENCODING);
148	}
149	} catch (Exception ex) {
150	Logger.getLogger(Search.class.getName()).log(Level.SEVERE, null, ex);
151	}
152	if (bytes != null) {
153	DataTransfer.uploadToPW(user, pass, bytes, "text/plan", ".txt");
154	}
155	}
156
157	String useWebLichtOnText() {
158	String url = null;
159	try {
160	String text = Exports.getExportText(results);
161	if (text != null) {
162	byte[] bytes = text.getBytes(SEARCH_RESULTS_ENCODING);
163	url = DataTransfer.uploadToDropOff(bytes, "text/plan", ".txt");
164	}
165	} catch (Exception ex) {
166	Logger.getLogger(Search.class.getName()).log(Level.SEVERE, null, ex);
167	}
168	return url;
169	}
170
171	String useWebLichtOnToks(TokenizerModel tokenizerModel) throws ExportException {
172	String url = null;
173	byte[] bytes = Exports.getExportTokenizedTCF(results, searchLanguage, tokenizerModel);
174	if (bytes != null) {
175	url = DataTransfer.uploadToDropOff(bytes, "text/tcf+xml", ".tcf");
176	}
177	return url;
178	}
179
180	void exportPWExcel(String user, String pass) throws ExportException {
181	byte[] bytes = Exports.getExportExcel(results);
182	if (bytes != null) {
183	DataTransfer.uploadToPW(user, pass, bytes, "application/vnd.ms-excel", ".xls");
184	}
185	}
186
187	public void exportPWTCF(String user, String pass, TokenizerModel tokenizerModel) throws ExportException {
188	byte[] bytes = Exports.getExportTokenizedTCF(results, searchLanguage, tokenizerModel);
189	if (bytes != null) {
190	DataTransfer.uploadToPW(user, pass, bytes, "text/tcf+xml", ".tcf");
191	}
192	}
193
194	public void exportCSV() {
195	String csv = Exports.getExportCSV(results, ";");
196	if (csv != null) {
197	Filedownload.save(csv, "text/plain", "ClarinDFederatedContentSearch.csv");
198	}
199	}
200
201	public void exportPWCSV(String user, String pass) {
202	String csv = Exports.getExportCSV(results, ";");
203	if (csv != null) {
204	DataTransfer.uploadToPW(user, pass, csv.getBytes(), "text/csv", ".csv");
205	}
206	}
207
208	public void shutdown() {
209	// nothing to do
210	}
211
212	public long getCreatedAt() {
213	return createdAt;
214	}
215	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: