1 | /* |
---|
2 | * Copyright (C) 2015 CLARIN |
---|
3 | * |
---|
4 | * This program is free software: you can redistribute it and/or modify |
---|
5 | * it under the terms of the GNU General Public License as published by |
---|
6 | * the Free Software Foundation, either version 3 of the License, or |
---|
7 | * (at your option) any later version. |
---|
8 | * |
---|
9 | * This program is distributed in the hope that it will be useful, |
---|
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
12 | * GNU General Public License for more details. |
---|
13 | * |
---|
14 | * You should have received a copy of the GNU General Public License |
---|
15 | * along with this program. If not, see <http://www.gnu.org/licenses/>. |
---|
16 | */ |
---|
17 | package eu.clarin.cmdi.vlo.wicket; |
---|
18 | |
---|
19 | import com.google.common.collect.ImmutableSet; |
---|
20 | import java.io.Serializable; |
---|
21 | import java.util.Collection; |
---|
22 | |
---|
23 | /** |
---|
24 | * |
---|
25 | * @author Twan Goosen <twan.goosen@mpi.nl> |
---|
26 | */ |
---|
27 | public class HighlightSearchTermScriptFactory implements Serializable { |
---|
28 | |
---|
29 | public static final String HIGHLIGHT_FUNCTION = "$('%s').highlight(%s, {className:'%s', wordsOnly: %s})"; |
---|
30 | |
---|
31 | public static final Collection<String> DEFAULT_EXCLUDE_WORDS = ImmutableSet.of("and", "or", "not", "to"); |
---|
32 | |
---|
33 | public static final String DEFAULT_CSS_CLASS = "searchword"; |
---|
34 | |
---|
35 | public String createScript(String componentSelector, final String words) { |
---|
36 | return createScript(componentSelector, words, DEFAULT_CSS_CLASS); |
---|
37 | } |
---|
38 | |
---|
39 | public String createScript(String componentSelector, final String words, String cssClass) { |
---|
40 | return String.format(HIGHLIGHT_FUNCTION, |
---|
41 | componentSelector, |
---|
42 | makeWordListArray(words), |
---|
43 | cssClass, |
---|
44 | matchWordsOnly(words) |
---|
45 | ); |
---|
46 | } |
---|
47 | |
---|
48 | protected boolean matchWordsOnly(String query) { |
---|
49 | // string with asterixes or question marks should match by character |
---|
50 | return !query.matches(".*[\\*\\?].*"); |
---|
51 | } |
---|
52 | |
---|
53 | /** |
---|
54 | * |
---|
55 | * @param wordList string of whitespace separated words |
---|
56 | * @return a string representing a sanitised javascript array of words |
---|
57 | */ |
---|
58 | private CharSequence makeWordListArray(String wordList) { |
---|
59 | final StringBuilder sb = new StringBuilder("["); |
---|
60 | final String[] words = wordList.split("\\s"); |
---|
61 | for (int i = 0; i < words.length; i++) { |
---|
62 | final String word = sanitise(words[i]); //remove white space and quotes at beginning or end |
---|
63 | // is on exclude list? |
---|
64 | if (!getExcludeWords().contains(word.toLowerCase())) { |
---|
65 | // wrap in quotes |
---|
66 | sb.append("'").append(word).append("'"); |
---|
67 | if (i + 1 < words.length) { |
---|
68 | // prepare to append next |
---|
69 | sb.append(","); |
---|
70 | } |
---|
71 | } |
---|
72 | } |
---|
73 | return sb.append("]"); |
---|
74 | } |
---|
75 | |
---|
76 | private String sanitise(String word) { |
---|
77 | //remove everything up to first colon and strip off quotation marks and white space |
---|
78 | return word.replaceAll( |
---|
79 | //match beginning |
---|
80 | "^(" |
---|
81 | //case with colon (also strip quotes + optional whitespace after quotes) |
---|
82 | + "[^:\"']+:(['\"])?" |
---|
83 | //or case without colon (strip quotes and white space) |
---|
84 | + "|['\"]+" |
---|
85 | + ")" |
---|
86 | //match end |
---|
87 | + "|(" |
---|
88 | //quotes |
---|
89 | + "['\"]+" |
---|
90 | //or boosting values |
---|
91 | + "|['\"]?\\^.*" |
---|
92 | + ")$" |
---|
93 | //also remove wildcard characters |
---|
94 | + "|[\\?\\*]", |
---|
95 | //replace with empty string |
---|
96 | ""); |
---|
97 | } |
---|
98 | |
---|
99 | /** |
---|
100 | * |
---|
101 | * @return Words not to highlight |
---|
102 | */ |
---|
103 | protected Collection<String> getExcludeWords() { |
---|
104 | return DEFAULT_EXCLUDE_WORDS; |
---|
105 | } |
---|
106 | } |
---|