Ignore:
Timestamp:
02/26/15 16:17:04 (9 years ago)
Author:
emanuel.dima@uni-tuebingen.de
Message:
  1. alpha 25: removed unused iso-639-2 support and unused german tokenizer; fixed bug related to xml unencoding; changed weblicht config
File:
1 edited

Legend:

Unmodified
Added
Removed
  • SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/app/Aggregator.java

    r6057 r6065  
    1818import eu.clarin.sru.fcs.aggregator.rest.RestService;
    1919import eu.clarin.sru.fcs.aggregator.scan.Statistics;
    20 import eu.clarin.sru.fcs.aggregator.lang.LanguagesISO693_3;
     20import eu.clarin.sru.fcs.aggregator.util.LanguagesISO693;
    2121import io.dropwizard.Application;
    2222import io.dropwizard.assets.AssetsBundle;
     
    2525import java.io.File;
    2626import java.io.IOException;
    27 import java.io.InputStream;
    2827import java.util.ArrayList;
    2928import java.util.Collections;
     
    3534import java.util.concurrent.ScheduledExecutorService;
    3635import java.util.concurrent.atomic.AtomicReference;
    37 import opennlp.tools.tokenize.TokenizerModel;
    3836import org.slf4j.LoggerFactory;
    3937
     
    8886 * @author edima
    8987 *
     88 * TODO: ?use weblicht only to show up in zoomed mode
     89 * - send only tcf with only a text layer and language (from the list in params)
     90 *
     91 * TODO: add the modes described above (except live)
     92 *
    9093 * TODO: zoom into the results from a corpus, allow functionality only for
    9194 * the view (search for next set of results)
     
    9598 * Twan (they did a test, it worked)
    9699 *
     100 * TODO: add PiWik support, tracking the following:
     101 * - visits, searches, search per corpus
     102 *
     103 * TODO: BUG: language detection is immediate, in UI; export implications
     104 *
    97105 * TODO: websockets
    98106 *
     
    122130        private AtomicReference<Statistics> searchStatsAtom = new AtomicReference<Statistics>(new Statistics());
    123131
    124         private TokenizerModel tokenizerModel;
    125132        private LanguageDetector languageDetector;
    126133        private TextObjectFactory textObjectFactory;
     
    235242                }
    236243
    237                 LanguagesISO693_3.getInstance(); // force init
    238                 initTokenizer();
     244                LanguagesISO693.getInstance(); // force init
    239245                initLanguageDetector();
    240246
     
    293299        }
    294300
    295         public TokenizerModel getTokenizerModel() {
    296                 return tokenizerModel;
    297         }
    298 
    299301        private static void shutdownAndAwaitTermination(AggregatorConfiguration.Params params,
    300302                        ThrottledClient sruClient, ExecutorService scheduler) {
     
    313315        }
    314316
    315         private void initTokenizer() {
    316                 TokenizerModel model = null;
    317                 try {
    318                         try (InputStream tokenizerModelDeAsIS = Thread.currentThread().getContextClassLoader().getResourceAsStream(DE_TOK_MODEL)) {
    319                                 model = new TokenizerModel(tokenizerModelDeAsIS);
    320                         }
    321                 } catch (IOException ex) {
    322                         log.error("Failed to load tokenizer model", ex);
    323                 }
    324                 tokenizerModel = model;
    325         }
    326 
    327317        public void initLanguageDetector() throws IOException {
    328318                List<LanguageProfile> languageProfiles = new LanguageProfileReader().readAll();
Note: See TracChangeset for help on using the changeset viewer.