source: vlo/trunk/vlo-web-app/src/main/java/eu/clarin/cmdi/vlo/wicket/HighlightSearchTermScriptFactory.java @ 6695

Last change on this file since 6695 was 6695, checked in by Twan Goosen, 9 years ago

merged changes from 3.3 branch to trunk

File size: 3.7 KB
Line 
1/*
2 * Copyright (C) 2015 CLARIN
3 *
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 */
17package eu.clarin.cmdi.vlo.wicket;
18
19import com.google.common.collect.ImmutableSet;
20import java.io.Serializable;
21import java.util.Collection;
22
23/**
24 *
25 * @author Twan Goosen <twan.goosen@mpi.nl>
26 */
27public class HighlightSearchTermScriptFactory implements Serializable {
28
29    public static final String HIGHLIGHT_FUNCTION = "$('%s').highlight(%s, {className:'%s', wordsOnly: %s})";
30
31    public static final Collection<String> DEFAULT_EXCLUDE_WORDS = ImmutableSet.of("and", "or", "not", "to");
32
33    public static final String DEFAULT_CSS_CLASS = "searchword";
34
35    public String createScript(String componentSelector, final String words) {
36        return createScript(componentSelector, words, DEFAULT_CSS_CLASS);
37    }
38
39    public String createScript(String componentSelector, final String words, String cssClass) {
40        return String.format(HIGHLIGHT_FUNCTION,
41                componentSelector,
42                makeWordListArray(words),
43                cssClass,
44                matchWordsOnly(words)
45        );
46    }
47
48    protected boolean matchWordsOnly(String query) {
49        // string with asterixes or question marks should match by character
50        return !query.matches(".*[\\*\\?].*");
51    }
52
53    /**
54     *
55     * @param wordList string of whitespace separated words
56     * @return a string representing a sanitised javascript array of words
57     */
58    private CharSequence makeWordListArray(String wordList) {
59        final StringBuilder sb = new StringBuilder("[");
60        final String[] words = wordList.split("\\s");
61        for (int i = 0; i < words.length; i++) {
62            final String word = sanitise(words[i]); //remove white space and quotes at beginning or end
63            // is on exclude list?
64            if (!getExcludeWords().contains(word.toLowerCase())) {
65                // wrap in quotes
66                sb.append("'").append(word).append("'");
67                if (i + 1 < words.length) {
68                    // prepare to append next
69                    sb.append(",");
70                }
71            }
72        }
73        return sb.append("]");
74    }
75
76    private String sanitise(String word) {
77        //remove everything up to first colon and strip off quotation marks and white space
78        return word.replaceAll(
79                //match beginning
80                "^("
81                //case with colon (also strip quotes + optional whitespace after quotes)
82                + "[^:\"']+:(['\"])?"
83                //or case without colon (strip quotes and white space)
84                + "|['\"]+"
85                + ")"
86                //match end
87                + "|("
88                //quotes
89                + "['\"]+"
90                //or boosting values
91                + "|['\"]?\\^.*"
92                + ")$"
93                //also remove wildcard characters
94                + "|[\\?\\*]",
95                //replace with empty string
96                "");
97    }
98
99    /**
100     *
101     * @return Words not to highlight
102     */
103    protected Collection<String> getExcludeWords() {
104        return DEFAULT_EXCLUDE_WORDS;
105    }
106}
Note: See TracBrowser for help on using the repository browser.