1 | package eu.clarin.sru.fcs.aggregator.search; |
---|
2 | |
---|
3 | import eu.clarin.sru.client.fcs.DataViewHits; |
---|
4 | import eu.clarin.sru.fcs.aggregator.app.Aggregator; |
---|
5 | import eu.clarin.sru.fcs.aggregator.lang.LanguagesISO693_3; |
---|
6 | import java.util.ArrayList; |
---|
7 | import java.util.List; |
---|
8 | |
---|
9 | /** |
---|
10 | * Represents keyword in context data view and information about its PID and |
---|
11 | * reference. |
---|
12 | * |
---|
13 | * @author Yana Panchenko |
---|
14 | */ |
---|
15 | public class Kwic { |
---|
16 | |
---|
17 | public static class TextFragment { |
---|
18 | |
---|
19 | String text; |
---|
20 | boolean isHit; |
---|
21 | |
---|
22 | public TextFragment(String text, boolean isHit) { |
---|
23 | this.text = text; |
---|
24 | this.isHit = isHit; |
---|
25 | } |
---|
26 | |
---|
27 | public String getText() { |
---|
28 | return text; |
---|
29 | } |
---|
30 | |
---|
31 | public boolean isHit() { |
---|
32 | return isHit; |
---|
33 | } |
---|
34 | |
---|
35 | @Override |
---|
36 | public String toString() { |
---|
37 | return (isHit ? "[" : "") + text + (isHit ? "]" : ""); |
---|
38 | } |
---|
39 | } |
---|
40 | |
---|
41 | private String pid; |
---|
42 | private String reference; |
---|
43 | private String language; |
---|
44 | private List<TextFragment> fragments = new ArrayList<TextFragment>(); |
---|
45 | |
---|
46 | public Kwic(DataViewHits hits, String pid, String reference) { |
---|
47 | this.pid = pid; |
---|
48 | this.reference = reference; |
---|
49 | |
---|
50 | String text = hits.getText(); |
---|
51 | int lastOffset = 0; |
---|
52 | for (int i = 0; i < hits.getHitCount(); i++) { |
---|
53 | int[] offsets = hits.getHitOffsets(i); |
---|
54 | if (lastOffset < offsets[0]) { |
---|
55 | fragments.add(new TextFragment(text.substring(lastOffset, offsets[0]), false)); |
---|
56 | } |
---|
57 | if (offsets[0] < offsets[1]) { |
---|
58 | fragments.add(new TextFragment(text.substring(offsets[0], offsets[1]), true)); |
---|
59 | } |
---|
60 | lastOffset = offsets[1]; |
---|
61 | } |
---|
62 | if (lastOffset < text.length()) { |
---|
63 | fragments.add(new TextFragment(text.substring(lastOffset, text.length()), false)); |
---|
64 | } |
---|
65 | |
---|
66 | String code_iso639_1 = Aggregator.getInstance().detectLanguage(hits.getText()); |
---|
67 | language = code_iso639_1 == null ? null |
---|
68 | : LanguagesISO693_3.getInstance().code_3ForCode_1(code_iso639_1); |
---|
69 | } |
---|
70 | |
---|
71 | public List<TextFragment> getFragments() { |
---|
72 | return fragments; |
---|
73 | } |
---|
74 | |
---|
75 | public String getPid() { |
---|
76 | return pid; |
---|
77 | } |
---|
78 | |
---|
79 | public String getReference() { |
---|
80 | return reference; |
---|
81 | } |
---|
82 | |
---|
83 | public String getLanguage() { |
---|
84 | return language; |
---|
85 | } |
---|
86 | |
---|
87 | @Deprecated |
---|
88 | public String getLeft() { |
---|
89 | for (TextFragment tf : fragments) { |
---|
90 | if (!tf.isHit) { |
---|
91 | return tf.text; |
---|
92 | } |
---|
93 | } |
---|
94 | return ""; |
---|
95 | } |
---|
96 | |
---|
97 | @Deprecated |
---|
98 | public String getKeyword() { |
---|
99 | for (TextFragment tf : fragments) { |
---|
100 | if (tf.isHit) { |
---|
101 | return tf.text; |
---|
102 | } |
---|
103 | } |
---|
104 | return ""; |
---|
105 | } |
---|
106 | |
---|
107 | @Deprecated |
---|
108 | public String getRight() { |
---|
109 | StringBuilder sb = new StringBuilder(); |
---|
110 | boolean pastHit = false; |
---|
111 | for (TextFragment tf : fragments) { |
---|
112 | if (pastHit) { |
---|
113 | sb.append(tf.text); |
---|
114 | } |
---|
115 | if (tf.isHit) { |
---|
116 | pastHit = true; |
---|
117 | } |
---|
118 | } |
---|
119 | return sb.toString(); |
---|
120 | } |
---|
121 | } |
---|