1 | package eu.clarin.cmdi.vlo.dao; |
---|
2 | |
---|
3 | import eu.clarin.cmdi.vlo.config.VloConfig; |
---|
4 | import java.net.MalformedURLException; |
---|
5 | import java.util.Arrays; |
---|
6 | import java.util.HashSet; |
---|
7 | import java.util.Set; |
---|
8 | import org.apache.solr.client.solrj.SolrQuery; |
---|
9 | import org.apache.solr.client.solrj.SolrServerException; |
---|
10 | import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer; |
---|
11 | import org.apache.solr.client.solrj.response.QueryResponse; |
---|
12 | import org.apache.solr.client.solrj.util.ClientUtils; |
---|
13 | import org.apache.solr.common.SolrDocument; |
---|
14 | import org.apache.solr.common.SolrDocumentList; |
---|
15 | import org.slf4j.Logger; |
---|
16 | import org.slf4j.LoggerFactory; |
---|
17 | |
---|
18 | public class SolrDao { |
---|
19 | |
---|
20 | private final static Logger LOG = LoggerFactory.getLogger(SolrDao.class); |
---|
21 | private final CommonsHttpSolrServer solrServer; |
---|
22 | |
---|
23 | public SolrDao() { |
---|
24 | String solrUrl; |
---|
25 | solrUrl = VloConfig.getSolrUrl(); |
---|
26 | try { |
---|
27 | solrServer = new CommonsHttpSolrServer(solrUrl); |
---|
28 | } catch (MalformedURLException e) { |
---|
29 | throw new RuntimeException("URL: " + solrUrl, e); |
---|
30 | } |
---|
31 | } |
---|
32 | |
---|
33 | protected CommonsHttpSolrServer getSolrserver() { |
---|
34 | return solrServer; |
---|
35 | } |
---|
36 | |
---|
37 | /** |
---|
38 | * Basic sanitising of Solr queries. |
---|
39 | * |
---|
40 | * Query is based on the URL to the VLO web application. Also, explain |
---|
41 | * about the URL and ?fq=language:dutch |
---|
42 | * Assume filters have the form a:b |
---|
43 | * like for example language:dutch |
---|
44 | * |
---|
45 | * @param query |
---|
46 | * @return |
---|
47 | */ |
---|
48 | private SolrQuery sanitise (SolrQuery query){ |
---|
49 | |
---|
50 | // String [] facetsFromConfig; |
---|
51 | |
---|
52 | // try and get the filters facets from the query |
---|
53 | String [] filtersInQuery; |
---|
54 | filtersInQuery = query.getFilterQueries(); |
---|
55 | |
---|
56 | if (filtersInQuery == null) { |
---|
57 | // the query does not contain filters |
---|
58 | } else { |
---|
59 | // get the facets from the configuration file |
---|
60 | // facetsFromConfig = VloConfig.getFacetFields(); |
---|
61 | |
---|
62 | // present the facets from the config file as a list to a new set |
---|
63 | Set<String> facetsDefined; |
---|
64 | facetsDefined = new HashSet<String>(Arrays.asList(VloConfig.getFacetFields())); |
---|
65 | |
---|
66 | // check the filters in the query by name |
---|
67 | for (String filter : filtersInQuery) { |
---|
68 | // split up a filter, look at the string preceeding the semicolon |
---|
69 | String facetInFilter = filter.split(":") [0]; |
---|
70 | |
---|
71 | if (facetsDefined.contains(facetInFilter)) { |
---|
72 | // facet in the filter is in the set that is defined by the config file |
---|
73 | } else { |
---|
74 | if (facetInFilter.startsWith("_")) { |
---|
75 | // this facet is hidden, do not consider it |
---|
76 | } else { |
---|
77 | // the filter name does not match a facet in the facet |
---|
78 | query.removeFilterQuery(filter); |
---|
79 | } |
---|
80 | } |
---|
81 | } |
---|
82 | } |
---|
83 | |
---|
84 | // finally, return the sanitised query |
---|
85 | return query; |
---|
86 | } |
---|
87 | |
---|
88 | protected QueryResponse fireQuery(SolrQuery query) { |
---|
89 | SolrQuery sanitisedQuery; |
---|
90 | sanitisedQuery = sanitise(query); |
---|
91 | try { |
---|
92 | return solrServer.query(sanitisedQuery); |
---|
93 | } catch (SolrServerException e) { |
---|
94 | LOG.error("Error getting data:", e); |
---|
95 | throw new RuntimeException(e); |
---|
96 | } |
---|
97 | } |
---|
98 | |
---|
99 | public SolrDocument getSolrDocument(String docId) { |
---|
100 | SolrDocument result = null; |
---|
101 | SolrQuery query = new SolrQuery(); |
---|
102 | query.setQuery("id:" + ClientUtils.escapeQueryChars(docId)); |
---|
103 | query.setFields("*"); |
---|
104 | SolrDocumentList docs = fireQuery(query).getResults(); |
---|
105 | if (docs.getNumFound() > 1) { |
---|
106 | LOG.error("Error: found multiple documents for id (will return first one): " + docId + " \nDocuments found: " + docs); |
---|
107 | result = docs.get(0); |
---|
108 | } else if (docs.getNumFound() == 1) { |
---|
109 | result = docs.get(0); |
---|
110 | } |
---|
111 | return result; |
---|
112 | } |
---|
113 | } |
---|