source: SRUClient/tags/SRUClient-0.9.5/src/main/java/eu/clarin/sru/client/fcs/DataViewParserKWIC.java @ 6079

Last change on this file since 6079 was 6079, checked in by Oliver Schonefeld, 9 years ago
  • tag version 0.9.5
File size: 4.0 KB
Line 
1/**
2 * This software is copyright (c) 2012-2014 by
3 *  - Institut fuer Deutsche Sprache (http://www.ids-mannheim.de)
4 * This is free software. You can redistribute it
5 * and/or modify it under the terms described in
6 * the GNU General Public License v3 of which you
7 * should have received a copy. Otherwise you can download
8 * it from
9 *
10 *   http://www.gnu.org/licenses/gpl-3.0.txt
11 *
12 * @copyright Institut fuer Deutsche Sprache (http://www.ids-mannheim.de)
13 *
14 * @license http://www.gnu.org/licenses/gpl-3.0.txt
15 *  GNU General Public License v3
16 */
17package eu.clarin.sru.client.fcs;
18
19import javax.xml.stream.XMLStreamException;
20import javax.xml.stream.XMLStreamReader;
21
22import org.slf4j.Logger;
23import org.slf4j.LoggerFactory;
24
25import eu.clarin.sru.client.SRUClientException;
26import eu.clarin.sru.client.XmlStreamReaderUtils;
27
28
29/**
30 * An implementation of a Data View parser that parses legacy KWIC Data Views.
31 * The input will automatically be upgraded to a HITS Data View and an instance
32 * of {@link DataViewHits} will be returned.
33 *
34 * @see DataViewHits
35 * @deprecated Use only to talk to legacy clients. Endpoints should upgrade to
36 *             recent CLARIN-FCS specification.
37 */
38@Deprecated
39public class DataViewParserKWIC implements DataViewParser {
40    private static final String FCS_KWIC_NS = "http://clarin.eu/fcs/1.0/kwic";
41    private static final String KWIC_LEGACY_TYPE = "kwic";
42    private static final String KWIC_TYPE =
43            "application/x-clarin-fcs-kwic+xml";
44    private static final Logger logger =
45            LoggerFactory.getLogger(DataViewParserKWIC.class);
46
47    @Override
48    public boolean acceptType(String type) {
49        return KWIC_TYPE.equals(type) || KWIC_LEGACY_TYPE.equals(type);
50    }
51
52    @Override
53    public int getPriority() {
54        return 1000;
55    }
56
57    @Override
58    public DataView parse(XMLStreamReader reader, String type, String pid,
59            String ref) throws XMLStreamException, SRUClientException {
60        if (KWIC_LEGACY_TYPE.equals(type)) {
61            logger.warn("type '" + KWIC_LEGACY_TYPE + "' is deprecated " +
62                    "for a KWIC <DataView>, please use '" +
63                    KWIC_TYPE + "' instead");
64        }
65        String left = null;
66        String keyword = null;
67        String right = null;
68
69        XmlStreamReaderUtils.readStart(reader, FCS_KWIC_NS, "kwic", true);
70        if (XmlStreamReaderUtils.readStart(reader, FCS_KWIC_NS, "c", false)) {
71            left = XmlStreamReaderUtils.readString(reader, false);
72            XmlStreamReaderUtils.readEnd(reader, FCS_KWIC_NS, "c");
73        }
74        keyword = XmlStreamReaderUtils.readContent(reader, FCS_KWIC_NS, "kw", true);
75        if (XmlStreamReaderUtils.readStart(reader, FCS_KWIC_NS, "c", false)) {
76            right = XmlStreamReaderUtils.readString(reader, false);
77            XmlStreamReaderUtils.readEnd(reader, FCS_KWIC_NS, "c");
78        }
79        XmlStreamReaderUtils.readEnd(reader, FCS_KWIC_NS, "kwic");
80
81        logger.debug("left='{}' keyword='{}', right='{}'",
82                left, keyword, right);
83
84        logger.warn("Upgraded deprecated KWIC dataview to HITS dataview. " +
85                "Please upgrade to the new CLARIN-FCS specification " +
86                "as soon as possible.");
87        final int[] offsets    = new int[3];
88        final StringBuilder sb = new StringBuilder();
89        if (left != null) {
90            sb.append(left);
91            if (!Character.isWhitespace(sb.charAt(sb.length() - 1))) {
92                sb.append(" ");
93            }
94        }
95        offsets[0] = sb.length();
96        sb.append(keyword);
97        offsets[1] = sb.length();
98        if (right != null) {
99            if (!Character.isWhitespace(sb.charAt(sb.length() - 1))) {
100                sb.append(" ");
101            }
102            sb.append(right);
103        }
104        return new DataViewHits(pid, ref, sb.toString(), offsets, 3);
105    }
106
107} // class DataViewParserKWIC
Note: See TracBrowser for help on using the repository browser.