source: SRUClient/trunk/src/main/java/eu/clarin/sru/client/fcs/DataViewParserKWIC.java @ 5743

Last change on this file since 5743 was 5743, checked in by Oliver Schonefeld, 10 years ago
  • more work-in-progress
File size: 3.6 KB
Line 
1/**
2 * This software is copyright (c) 2012-2013 by
3 *  - Institut fuer Deutsche Sprache (http://www.ids-mannheim.de)
4 * This is free software. You can redistribute it
5 * and/or modify it under the terms described in
6 * the GNU General Public License v3 of which you
7 * should have received a copy. Otherwise you can download
8 * it from
9 *
10 *   http://www.gnu.org/licenses/gpl-3.0.txt
11 *
12 * @copyright Institut fuer Deutsche Sprache (http://www.ids-mannheim.de)
13 *
14 * @license http://www.gnu.org/licenses/gpl-3.0.txt
15 *  GNU General Public License v3
16 */
17package eu.clarin.sru.client.fcs;
18
19import javax.xml.stream.XMLStreamException;
20import javax.xml.stream.XMLStreamReader;
21
22import org.slf4j.Logger;
23import org.slf4j.LoggerFactory;
24
25import eu.clarin.sru.client.SRUClientException;
26import eu.clarin.sru.client.XmlStreamReaderUtils;
27
28public class DataViewParserKWIC implements DataViewParser {
29    private static final String FCS_KWIC_NS = "http://clarin.eu/fcs/1.0/kwic";
30    private static final String KWIC_LEGACY_TYPE = "kwic";
31    private static final String KWIC_TYPE =
32            "application/x-clarin-fcs-kwic+xml";
33    private static final Logger logger =
34            LoggerFactory.getLogger(DataViewParserKWIC.class);
35
36    @Override
37    public boolean acceptType(String type) {
38        return KWIC_TYPE.equals(type) || KWIC_LEGACY_TYPE.equals(type);
39    }
40
41    @Override
42    public int getPriority() {
43        return 1000;
44    }
45
46    @Override
47    public DataView parse(XMLStreamReader reader, String type, String pid,
48            String ref) throws XMLStreamException, SRUClientException {
49        if (KWIC_LEGACY_TYPE.equals(type)) {
50            logger.warn("type '" + KWIC_LEGACY_TYPE + "' is deprecated " +
51                    "for a KWIC <DataView>, please use '" +
52                    KWIC_TYPE + "' instead");
53        }
54        String left = null;
55        String keyword = null;
56        String right = null;
57
58        XmlStreamReaderUtils.readStart(reader, FCS_KWIC_NS, "kwic", true);
59        if (XmlStreamReaderUtils.readStart(reader, FCS_KWIC_NS, "c", false)) {
60            left = XmlStreamReaderUtils.readString(reader, false);
61            XmlStreamReaderUtils.readEnd(reader, FCS_KWIC_NS, "c");
62        }
63        keyword = XmlStreamReaderUtils.readContent(reader, FCS_KWIC_NS, "kw", true);
64        if (XmlStreamReaderUtils.readStart(reader, FCS_KWIC_NS, "c", false)) {
65            right = XmlStreamReaderUtils.readString(reader, false);
66            XmlStreamReaderUtils.readEnd(reader, FCS_KWIC_NS, "c");
67        }
68        XmlStreamReaderUtils.readEnd(reader, FCS_KWIC_NS, "kwic");
69
70        logger.debug("left='{}' keyword='{}', right='{}'",
71                left, keyword, right);
72
73        logger.warn("Upgraded deprecated KWIC dataview to HITS dataview. " +
74                "Please upgrade to the new CLARIN-FCS specification " +
75                "as soon as possible.");
76        final int[] offsets    = new int[3];
77        final StringBuilder sb = new StringBuilder();
78        if (left != null) {
79            sb.append(left);
80            if (!Character.isWhitespace(sb.charAt(sb.length() - 1))) {
81                sb.append(" ");
82            }
83        }
84        offsets[0] = sb.length();
85        sb.append(keyword);
86        offsets[1] = sb.length();
87        if (right != null) {
88            if (!Character.isWhitespace(sb.charAt(sb.length() - 1))) {
89                sb.append(" ");
90            }
91            sb.append(right);
92        }
93        return new DataViewHits(pid, ref, sb.toString(), offsets, 3);
94    }
95
96} // class DataViewParserKWIC
Note: See TracBrowser for help on using the repository browser.