source: SRUClient/tags/SRUClient-0.9.5/src/main/java/eu/clarin/sru/client/fcs/DataViewParserHits.java @ 6079

Last change on this file since 6079 was 6079, checked in by Oliver Schonefeld, 9 years ago
  • tag version 0.9.5
  • Property svn:eol-style set to native
File size: 3.4 KB
Line 
1/**
2 * This software is copyright (c) 2012-2014 by
3 *  - Institut fuer Deutsche Sprache (http://www.ids-mannheim.de)
4 * This is free software. You can redistribute it
5 * and/or modify it under the terms described in
6 * the GNU General Public License v3 of which you
7 * should have received a copy. Otherwise you can download
8 * it from
9 *
10 *   http://www.gnu.org/licenses/gpl-3.0.txt
11 *
12 * @copyright Institut fuer Deutsche Sprache (http://www.ids-mannheim.de)
13 *
14 * @license http://www.gnu.org/licenses/gpl-3.0.txt
15 *  GNU General Public License v3
16 */
17package eu.clarin.sru.client.fcs;
18
19import java.util.Arrays;
20
21import javax.xml.stream.XMLStreamException;
22import javax.xml.stream.XMLStreamReader;
23
24import org.slf4j.Logger;
25import org.slf4j.LoggerFactory;
26
27import eu.clarin.sru.client.SRUClientException;
28import eu.clarin.sru.client.XmlStreamReaderUtils;
29
30
31/**
32 * An implementation of a Data View parser that parses HITS Data Views. This
33 * parser expects input that conforms to the CLARIN-FCS specification for the
34 * HITS Data View.
35 *
36 * @see DataViewHits
37 */
38public final class DataViewParserHits implements DataViewParser {
39    private static final int OFFSET_CHUNK_SIZE = 8;
40    private static final String FCS_HITS_NS =
41            "http://clarin.eu/fcs/dataview/hits";
42    private static final Logger logger =
43            LoggerFactory.getLogger(DataViewParserHits.class);
44
45
46    @Override
47    public boolean acceptType(String type) {
48        return DataViewHits.TYPE.equals(type);
49    }
50
51
52    @Override
53    public int getPriority() {
54        return 1000;
55    }
56
57
58    @Override
59    public DataView parse(XMLStreamReader reader, String type, String pid,
60            String ref) throws XMLStreamException, SRUClientException {
61        int offsets[] = new int[OFFSET_CHUNK_SIZE];
62        int offsets_idx = 0;
63        StringBuilder buffer = new StringBuilder();
64        XmlStreamReaderUtils.readStart(reader, FCS_HITS_NS, "Result", true);
65
66        int idx = 0;
67        while (!XmlStreamReaderUtils.peekEnd(reader, FCS_HITS_NS, "Result")) {
68            if (buffer.length() > 0) {
69                if (!Character.isWhitespace(buffer.charAt(buffer.length() - 1))) {
70                    buffer.append(' ');
71                }
72                idx = buffer.length();
73            }
74
75            if (XmlStreamReaderUtils.readStart(reader, FCS_HITS_NS, "Hit", false)) {
76                String hit = XmlStreamReaderUtils.readString(reader, false);
77                XmlStreamReaderUtils.readEnd(reader, FCS_HITS_NS, "Hit");
78                if (hit.length() > 0) {
79                    buffer.append(hit);
80                    if (offsets_idx == offsets.length) {
81                        offsets = Arrays.copyOf(offsets, offsets.length + 8);
82                    }
83                    /*
84                     * add pair of offsets and simultaneously increase index
85                     */
86                    offsets[offsets_idx++] = idx;
87                    offsets[offsets_idx++] = idx + hit.length();
88                } else {
89                    logger.warn("skipping empty <Hit> element within <Result> element");
90                }
91            } else {
92                buffer.append(XmlStreamReaderUtils.readString(reader, false));
93            }
94        } // while
95        XmlStreamReaderUtils.readEnd(reader, FCS_HITS_NS, "Result");
96
97        final String text = buffer.toString();
98        return new DataViewHits(pid, ref, text, offsets, offsets_idx);
99    }
100
101} // class DataViewParserHits
Note: See TracBrowser for help on using the repository browser.