/** * This software is copyright (c) 2013-2022 by * - Leibniz-Institut fuer Deutsche Sprache (http://www.ids-mannheim.de) * This is free software. You can redistribute it * and/or modify it under the terms described in * the GNU General Public License v3 of which you * should have received a copy. Otherwise you can download * it from * * http://www.gnu.org/licenses/gpl-3.0.txt * * @copyright Leibniz-Institut fuer Deutsche Sprache (http://www.ids-mannheim.de) * * @license http://www.gnu.org/licenses/gpl-3.0.txt * GNU General Public License v3 */ package eu.clarin.sru.server.fcs; import java.net.URI; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamWriter; /** * Helper class for serializing Advanced Data Views. It can be used for writing * more than once, but it is not thread-save. This helper can also * serialize HITS Data Views. */ public class AdvancedDataViewWriter { public enum Unit { ITEM, TIMESTAMP } private static final long INITIAL_SEGMENT_ID = 1; public static final int NO_HIGHLIGHT = -1; private static final String ADV_PREFIX = "adv"; private static final String ADV_NS = "http://clarin.eu/fcs/dataview/advanced"; private static final String ADV_MIME_TYPE = "application/x-clarin-fcs-adv+xml"; private static final String HITS_MIME_TYPE = "application/x-clarin-fcs-hits+xml"; private static final String FCS_HITS_PREFIX = "hits"; private static final String FCS_HITS_NS = "http://clarin.eu/fcs/dataview/hits"; private final Unit unit; private final List segments = new ArrayList<>(); private final Map> layers = new HashMap<>(); private long nextSegmentId = INITIAL_SEGMENT_ID; /** * Constructor. * * @param unit * the unit to be used for span offsets * @see Unit */ public AdvancedDataViewWriter(Unit unit) { if (unit == null) { throw new NullPointerException("unit == null"); } this.unit = unit; } /** * Reset the writer for writing a new data view (instance). */ public void reset() { nextSegmentId = INITIAL_SEGMENT_ID; } /** * Add a span. * * @param layerId * the span's layer id * @param start * the span's start offset * @param end * the span's end offset * @param value * the span's content value or null if none * @throws IllegalArgumentException * if any argument is invalid */ public void addSpan(URI layerId, long start, long end, String value) { addSpan(layerId, start, end, value, null, NO_HIGHLIGHT); } /** * Add a span. * * @param layerId * the span's layer id * @param start * the span's start offset * @param end * the span's end offset * @param value * the span's content value or null if none * @param highlight * the highlight group * @throws IllegalArgumentException * if any argument is invalid */ public void addSpan(URI layerId, long start, long end, String value, int highlight) { addSpan(layerId, start, end, value, null, highlight); } /** * Add a span. * * @param layerId * the span's layer id * @param start * the span's start offset * @param end * the span's end offset * @param value * the span's content value or null if none * @param altValue * the span's alternate value or null if none */ public void addSpan(URI layerId, long start, long end, String value, String altValue) { addSpan(layerId, start, end, value, altValue, NO_HIGHLIGHT); } /** * Add a span. * * @param layerId * the span's layer id * @param start * the span's start offset * @param end * the span's end offset * @param value * the span's content value or null if none * @param altValue * the span's alternate value or null if none * @param highlight * the span's alternate value or null if none * @param highlight * the highlight group * @throws IllegalArgumentException * if any argument is invalid */ public void addSpan(URI layerId, long start, long end, String value, String altValue, int highlight) { if (layerId == null) { throw new NullPointerException("layerId == null"); } if (start < 0) { throw new IllegalArgumentException("start < 0"); } if (end < start) { throw new IllegalArgumentException("end < start"); } if (highlight <= 0) { highlight = NO_HIGHLIGHT; } // find segment or create a new one Segment segment = null; for (Segment seg : segments) { if ((seg.start == start) && (seg.end == end)) { segment = seg; break; } } if (segment == null) { segment = new Segment(nextSegmentId++, start, end); segments.add(segment); } // find layer or create a new one List layer = layers.get(layerId); if (layer == null) { layer = new ArrayList<>(); layers.put(layerId, layer); } // sanity check (better overlap check?) for (Span span : layer) { if (segment.equals(span.segment)) { // FIXME: better exception! throw new IllegalArgumentException( "segment already exists in layer"); } } layer.add(new Span(segment, value, altValue, highlight)); } /** * Write the Advanced Data View to the output stream. * * @param writer * the writer to write to * @throws XMLStreamException * if an error occurred */ public void writeAdvancedDataView(XMLStreamWriter writer) throws XMLStreamException { if (writer == null) { throw new NullPointerException("writer == null"); } XMLStreamWriterHelper.writeStartDataView(writer, ADV_MIME_TYPE); writer.setPrefix(ADV_PREFIX, ADV_NS); writer.writeStartElement(ADV_NS, "Advanced"); writer.writeNamespace(ADV_PREFIX, ADV_NS); if (unit == Unit.ITEM) { writer.writeAttribute("unit", "item"); } else if (unit == Unit.TIMESTAMP) { writer.writeAttribute("unit", "timestamp"); } // segments writer.writeStartElement(ADV_NS, "Segments"); for (Segment segment : segments) { // FIXME: unit translation (long -> time) writer.writeEmptyElement(ADV_NS, "Segment"); writer.writeAttribute("id", segment.id); writer.writeAttribute("start", Long.toString(segment.start)); writer.writeAttribute("end", Long.toString(segment.end)); if (segment.ref != null) { writer.writeAttribute("ref", segment.ref.toString()); } } writer.writeEndElement(); // "Segments" element // layers writer.writeStartElement(ADV_NS, "Layers"); for (Map.Entry> layer : layers.entrySet()) { writer.writeStartElement(ADV_NS, "Layer"); writer.writeAttribute("id", layer.getKey().toString()); for (Span span : layer.getValue()) { if ((span.value != null) && !span.value.isEmpty()) { writer.writeStartElement(ADV_NS, "Span"); writer.writeAttribute("ref", span.segment.id); if (span.highlight != null) { writer.writeAttribute("highlight", span.highlight); } if (span.altValue != null) { writer.writeAttribute("alt-value", span.altValue); } writer.writeCharacters(span.value); writer.writeEndElement(); // "Span" element } else { writer.writeEmptyElement(ADV_NS, "Span"); writer.writeAttribute("ref", span.segment.id); if (span.highlight != null) { writer.writeAttribute("highlight", span.highlight); } if (span.altValue != null) { writer.writeAttribute("alt-value", span.altValue); } } } writer.writeEndElement(); // "Layer" element } writer.writeEndElement(); // "Layers" element writer.writeEndElement(); // "Advanced" element XMLStreamWriterHelper.writeEndDataView(writer); } /** * Convenience method to write HITS Data View. * * @param writer * the writer to write to * @param layerId * the layer id of the layer to be serialized as HITS Data View * @throws XMLStreamException * if an error occurred * @throws IllegalArgumentException * if an invalid layer id was provided */ public void writeHitsDataView(XMLStreamWriter writer, URI layerId) throws XMLStreamException { if (writer == null) { throw new NullPointerException("writer == null"); } if (layerId == null) { throw new NullPointerException("layerId == null"); } final List spans = layers.get(layerId); if (spans == null) { throw new IllegalArgumentException( "layer with id'" + layerId + "' does not exist"); } XMLStreamWriterHelper.writeStartDataView(writer, HITS_MIME_TYPE); writer.setPrefix(FCS_HITS_PREFIX, FCS_HITS_NS); writer.writeStartElement(FCS_HITS_NS, "Result"); writer.writeNamespace(FCS_HITS_PREFIX, FCS_HITS_NS); boolean needSpace = false; for (Span span : spans) { if (span.value.length() > 0) { if (needSpace) { writer.writeCharacters(" "); needSpace = false; } if (span.highlight != null) { writer.writeStartElement(FCS_HITS_NS, "Hit"); writer.writeCharacters(span.value); writer.writeEndElement(); // "Hit" element needSpace = true; } else { writer.writeCharacters(span.value); if (!Character.isWhitespace( (span.value.charAt(span.value.length() - 1)))) { needSpace = true; } } } } writer.writeEndElement(); // "Result" element XMLStreamWriterHelper.writeEndDataView(writer); } private static final class Segment { private final String id; private final long start; private final long end; private final URI ref; private Segment(long id, long start, long end) { this.id = "s" + Long.toHexString(id); this.start = start; this.end = end; /* * FIXME: add API to set reference */ this.ref = null; } } private static final class Span { private final Segment segment; private final String value; private final String altValue; private final String highlight; private Span(Segment segment, String value, String altValue, int highlight) { this.segment = segment; this.value = value; this.altValue = altValue; if (highlight != NO_HIGHLIGHT) { this.highlight = "h" + Integer.toHexString(highlight); } else { this.highlight = null; } } } } // class AdvancedDataViewWriter