1 | package eu.clarin.sru.fcs.aggregator.search; |
---|
2 | |
---|
3 | import eu.clarin.sru.client.fcs.DataViewHits; |
---|
4 | import eu.clarin.sru.fcs.aggregator.app.Aggregator; |
---|
5 | import eu.clarin.sru.fcs.aggregator.util.LanguagesISO693; |
---|
6 | import java.util.ArrayList; |
---|
7 | import java.util.List; |
---|
8 | import org.apache.commons.lang.StringEscapeUtils; |
---|
9 | |
---|
10 | /** |
---|
11 | * Represents keyword in context data view and information about its PID and |
---|
12 | * reference. |
---|
13 | * |
---|
14 | * @author Yana Panchenko |
---|
15 | */ |
---|
16 | public class Kwic { |
---|
17 | |
---|
18 | public static class TextFragment { |
---|
19 | |
---|
20 | String text; |
---|
21 | boolean isHit; |
---|
22 | |
---|
23 | public TextFragment(String text, boolean isHit) { |
---|
24 | this.text = text; |
---|
25 | this.isHit = isHit; |
---|
26 | } |
---|
27 | |
---|
28 | public String getText() { |
---|
29 | return text; |
---|
30 | } |
---|
31 | |
---|
32 | public boolean isHit() { |
---|
33 | return isHit; |
---|
34 | } |
---|
35 | |
---|
36 | @Override |
---|
37 | public String toString() { |
---|
38 | return (isHit ? "[" : "") + text + (isHit ? "]" : ""); |
---|
39 | } |
---|
40 | } |
---|
41 | |
---|
42 | private String pid; |
---|
43 | private String reference; |
---|
44 | private String language; |
---|
45 | private List<TextFragment> fragments = new ArrayList<TextFragment>(); |
---|
46 | |
---|
47 | public Kwic(DataViewHits hits, String pid, String reference) { |
---|
48 | this.pid = pid; |
---|
49 | this.reference = reference; |
---|
50 | |
---|
51 | // warning: the client library doesn't unescape the xml |
---|
52 | // so the text can still contains < and & codes |
---|
53 | String str = hits.getText(); |
---|
54 | |
---|
55 | int lastOffset = 0; |
---|
56 | for (int i = 0; i < hits.getHitCount(); i++) { |
---|
57 | int[] offsets = hits.getHitOffsets(i); |
---|
58 | if (lastOffset < offsets[0]) { |
---|
59 | String text = StringEscapeUtils.unescapeXml(str.substring(lastOffset, offsets[0])); |
---|
60 | fragments.add(new TextFragment(text, false)); |
---|
61 | } |
---|
62 | if (offsets[0] < offsets[1]) { |
---|
63 | String text = StringEscapeUtils.unescapeXml(str.substring(offsets[0], offsets[1])); |
---|
64 | fragments.add(new TextFragment(text, true)); |
---|
65 | } |
---|
66 | lastOffset = offsets[1]; |
---|
67 | } |
---|
68 | if (lastOffset < str.length()) { |
---|
69 | String text = StringEscapeUtils.unescapeXml(str.substring(lastOffset, str.length())); |
---|
70 | fragments.add(new TextFragment(text, false)); |
---|
71 | } |
---|
72 | |
---|
73 | String code_iso639_1 = Aggregator.getInstance().detectLanguage(str); |
---|
74 | language = code_iso639_1 == null ? null |
---|
75 | : LanguagesISO693.getInstance().code_3ForCode(code_iso639_1); |
---|
76 | } |
---|
77 | |
---|
78 | public List<TextFragment> getFragments() { |
---|
79 | return fragments; |
---|
80 | } |
---|
81 | |
---|
82 | public String getPid() { |
---|
83 | return pid; |
---|
84 | } |
---|
85 | |
---|
86 | public String getReference() { |
---|
87 | return reference; |
---|
88 | } |
---|
89 | |
---|
90 | public String getLanguage() { |
---|
91 | return language; |
---|
92 | } |
---|
93 | |
---|
94 | @Deprecated |
---|
95 | public String getLeft() { |
---|
96 | StringBuilder sb = new StringBuilder(); |
---|
97 | for (TextFragment tf : fragments) { |
---|
98 | if (tf.isHit) { |
---|
99 | break; |
---|
100 | } |
---|
101 | sb.append(tf.text); |
---|
102 | } |
---|
103 | return sb.toString(); |
---|
104 | } |
---|
105 | |
---|
106 | @Deprecated |
---|
107 | public String getKeyword() { |
---|
108 | for (TextFragment tf : fragments) { |
---|
109 | if (tf.isHit) { |
---|
110 | return tf.text; |
---|
111 | } |
---|
112 | } |
---|
113 | return ""; |
---|
114 | } |
---|
115 | |
---|
116 | @Deprecated |
---|
117 | public String getRight() { |
---|
118 | StringBuilder sb = new StringBuilder(); |
---|
119 | boolean pastHit = false; |
---|
120 | for (TextFragment tf : fragments) { |
---|
121 | if (pastHit) { |
---|
122 | sb.append(tf.text); |
---|
123 | } |
---|
124 | if (tf.isHit) { |
---|
125 | pastHit = true; |
---|
126 | } |
---|
127 | } |
---|
128 | return sb.toString(); |
---|
129 | } |
---|
130 | } |
---|