source: vlo/trunk/vlo_web_app/src/main/java/eu/clarin/cmdi/vlo/dao/SolrDao.java @ 4182

Last change on this file since 4182 was 4182, checked in by keeloo, 10 years ago

First part of solr server input sanitising. Preferably to be included in the 2.18 release.

File size: 3.9 KB
Line 
1package eu.clarin.cmdi.vlo.dao;
2
3import eu.clarin.cmdi.vlo.config.VloConfig;
4import java.net.MalformedURLException;
5import java.util.Arrays;
6import java.util.HashSet;
7import java.util.Set;
8import org.apache.solr.client.solrj.SolrQuery;
9import org.apache.solr.client.solrj.SolrServerException;
10import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
11import org.apache.solr.client.solrj.response.QueryResponse;
12import org.apache.solr.client.solrj.util.ClientUtils;
13import org.apache.solr.common.SolrDocument;
14import org.apache.solr.common.SolrDocumentList;
15import org.slf4j.Logger;
16import org.slf4j.LoggerFactory;
17
18public class SolrDao {
19
20    private final static Logger LOG = LoggerFactory.getLogger(SolrDao.class);
21    private final CommonsHttpSolrServer solrServer;
22
23    public SolrDao() {
24        String solrUrl;
25        solrUrl = VloConfig.getSolrUrl();
26        try {
27            solrServer = new CommonsHttpSolrServer(solrUrl);
28        } catch (MalformedURLException e) {
29            throw new RuntimeException("URL: " + solrUrl, e);
30        }
31    }
32
33    protected CommonsHttpSolrServer getSolrserver() {
34        return solrServer;
35    }
36   
37    /**
38     * Basic sanitising of Solr queries.
39     *
40     * Query is based on the URL to the VLO web application. Also, explain
41     * about the URL and ?fq=language:dutch
42     * Assume filters have the form a:b
43     * like for example language:dutch
44     *
45     * @param query
46     * @return
47     */
48    private SolrQuery sanitise (SolrQuery query){
49       
50        // String [] facetsFromConfig;
51       
52        // try and get the filters facets from the query
53        String [] filtersInQuery;
54        filtersInQuery = query.getFilterQueries();
55 
56        if (filtersInQuery == null) {
57            // the query does not contain filters
58        } else {
59            // get the facets from the configuration file
60            // facetsFromConfig = VloConfig.getFacetFields();
61
62            // present the facets from the config file as a list to a new set
63            Set<String> facetsDefined;
64            facetsDefined = new HashSet<String>(Arrays.asList(VloConfig.getFacetFields()));
65
66            // check the filters in the query by name
67            for (String filter : filtersInQuery) {
68                // split up a filter, look at the string preceeding the semicolon
69                String facetInFilter = filter.split(":") [0];
70               
71                if (facetsDefined.contains(facetInFilter)) {
72                    // facet in the filter is in the set that is defined by the config file
73                } else {
74                    if (facetInFilter.startsWith("_")) {
75                        // this facet is hidden, do not consider it
76                    } else {
77                        // the filter name does not match a facet in the facet
78                        query.removeFilterQuery(filter);
79                    }
80                }
81            }
82        }
83
84        // finally, return the sanitised query
85        return query;
86    }
87
88    protected QueryResponse fireQuery(SolrQuery query) {
89        SolrQuery sanitisedQuery;
90        sanitisedQuery = sanitise(query);
91        try {
92            return solrServer.query(sanitisedQuery);
93        } catch (SolrServerException e) {
94            LOG.error("Error getting data:", e);
95            throw new RuntimeException(e);
96        }
97    }
98
99    public SolrDocument getSolrDocument(String docId) {
100        SolrDocument result = null;
101        SolrQuery query = new SolrQuery();
102        query.setQuery("id:" + ClientUtils.escapeQueryChars(docId));
103        query.setFields("*");
104        SolrDocumentList docs = fireQuery(query).getResults();
105        if (docs.getNumFound() > 1) {
106            LOG.error("Error: found multiple documents for id (will return first one): " + docId + " \nDocuments found: " + docs);
107            result = docs.get(0);
108        } else if (docs.getNumFound() == 1) {
109            result = docs.get(0);
110        }
111        return result;
112    }
113}
Note: See TracBrowser for help on using the repository browser.