Changeset 5035

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/app/Aggregator.java

-                      r4106
+                      r5035
 import org.zkoss.zul.Textbox;
 import eu.clarin.sru.fcs.aggregator.sopt.Corpus;
+import eu.clarin.sru.fcs.aggregator.util.SRUCQL;
 import org.zkoss.zul.A;
-import org.zkoss.zul.Button;
 import org.zkoss.zul.Div;
 import org.zkoss.zul.Menubar;
 …
 /**
  * Main window of the Aggregator application.
+ * Main component of the Aggregator application.
+ *
  * @author Yana Panchenko
 …
     private PagesVisibility pagesVisibility;
+    private static final String WEBLICHT_URL = "https://weblicht.sfs.uni-tuebingen.de/WebLicht-4/?input=";
+    private static final String WEBLICHT_URL =
+            "https://weblicht.sfs.uni-tuebingen.de/WebLicht-4/?input=";
 …
         String[] paramValue;
         String query = null;
         paramValue = Executions.getCurrent().getParameterMap().get("query");
+        paramValue = Executions.getCurrent().getParameterMap().get(SRUCQL.SEARCH_QUERY_PARAMETER);
         if (paramValue != null) {
             query = paramValue[0].trim();
 …
+        }
         LOGGER.log(Level.INFO, "Received parameter: query[{0}], ", query);
         paramValue = Executions.getCurrent().getParameterMap().get("operation");
+        paramValue = Executions.getCurrent().getParameterMap().get(SRUCQL.OPERATION);
         String operationString = null;
         if (paramValue != null) {
             operationString = paramValue[0].trim();
             if (!operationString.equals("searchRetrieve")) {
+            if (!operationString.equals(SRUCQL.SEARCH_RETRIEVE)) {
                 Messagebox.show("Not supported operation " + operationString, "FCS", 0, Messagebox.INFORMATION);
+            }

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/app/ControlsVisibility.java

r3038	r5035
2	2
3	3	import org.zkoss.zul.A;
4		~~import org.zkoss.zul.Button;~~
5		~~import org.zkoss.zul.Label;~~
6	4	import org.zkoss.zul.Menubar;
7	5	import org.zkoss.zul.North;

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/app/SearchOptions.java

-                      r5034
+                      r5035
 import eu.clarin.sru.fcs.aggregator.sopt.CorpusRendererLive;
 import eu.clarin.sru.fcs.aggregator.sopt.Languages;
+import eu.clarin.sru.fcs.aggregator.cache.ScanCacheI;
+import eu.clarin.sru.fcs.aggregator.cache.ScanCache;
+import eu.clarin.sru.fcs.aggregator.util.SRUCQL;
 import java.lang.reflect.Type;
 import java.util.ArrayList;
 …
     private boolean liveMode = false;
+    private SRUVersion version = SRUVersion.VERSION_1_2;
+    private ScanCacheI cache;
+    private ScanCache cache;
     @Override
     public void doAfterCompose(Component comp) throws Exception {
         super.doAfterCompose(comp);
-        setUpSRUVersion();
         setUpAggerationContext();
         cache = (ScanCacheI) Executions.getCurrent().getDesktop().getWebApp().getAttribute(WebAppListener.CORPUS_CACHE);
+        cache = (ScanCache) Executions.getCurrent().getDesktop().getWebApp().getAttribute(WebAppListener.CORPUS_CACHE);
         //if (cache.isEmpty()) {
         //    liveMode = true;
 …
             DefaultTreeNode<Corpus> node = (DefaultTreeNode<Corpus>) treeitem.getValue();
             Corpus corpus = node.getData();
             if (corpus.getLanguages().contains(selectedLang) || selectedLang.equals("anylang")) {
+            if (corpus.getLanguages().contains(selectedLang) || selectedLang.equals(Languages.ANY_LANGUAGE_NAME)) {
                 treeitem.setVisible(true);
             } else {
 …
     private void setUpAggerationContext() {
         String[] paramValue = Executions.getCurrent().getParameterMap().get("x-aggregation-context");
+        String[] paramValue = Executions.getCurrent().getParameterMap().get(SRUCQL.AGGREGATION_CONTEXT);
         String contextJson = null;
         if (paramValue != null) {
             contextJson = paramValue[0].trim();
+        }
         LOGGER.log(Level.INFO, "Received parameter: x-aggregation-context[{0}], ", contextJson);
+        LOGGER.log(Level.INFO, "Received parameter {0}:[{1}], ", new String[]{SRUCQL.AGGREGATION_CONTEXT, contextJson});
         if (contextJson != null) {
 …
             } catch (Exception ex) {
                 LOGGER.log(Level.SEVERE, "Error parsing JSON from x-aggregation-context: {0} {1}", new String[]{ex.getMessage(), contextJson});
                 Messagebox.show("Error in x-aggregation-context parameter", "FCS", 0, Messagebox.INFORMATION);
+                Messagebox.show("Error in " + SRUCQL.AGGREGATION_CONTEXT, "FCS", 0, Messagebox.INFORMATION);
+            }
+        }
 …
+    }
-    private void setUpSRUVersion() {
-        String[] paramValue = Executions.getCurrent().getParameterMap().get("version");
-        String versionString = null;
-        if (paramValue != null) {
-            versionString = paramValue[0].trim();
-            if (versionString.equals("1.2")) {
-                version = SRUVersion.VERSION_1_2;
-            } else if (versionString.equals("1.1")) {
-                version = SRUVersion.VERSION_1_1;
-            } else {
-                Messagebox.show("SRU Version " + version + " not supported", "FCS", 0, Messagebox.INFORMATION);
+            }
+        }
-        LOGGER.log(Level.INFO, "Received parameter: version[{0}], ", versionString);
+    }
     private void selectCorpora(Treeitem openItem, Corpus data, List<String> handles) {

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/app/SearchResults.java

-                      r4106
+                      r5035
 import eu.clarin.sru.fcs.aggregator.sresult.SearchResult;
 import eu.clarin.sru.fcs.aggregator.sresult.SearchResultRecordRenderer;
 import eu.clarin.sru.fcs.aggregator.util.SRUCQLsearchRetrieve;
+import eu.clarin.sru.fcs.aggregator.util.SRUCQL;
 import eu.clarin.weblicht.wlfxb.io.WLDObjector;
 import eu.clarin.weblicht.wlfxb.io.WLFormatException;
 …
 import java.util.concurrent.Future;
 import java.util.concurrent.atomic.AtomicBoolean;
-import javax.ws.rs.core.MediaType;
 import opennlp.tools.tokenize.TokenizerME;
 import opennlp.tools.tokenize.TokenizerModel;
 …
         Executions.getCurrent().getDesktop().enableServerPush(true);
         searchClient = (SRUThreadedClient) Executions.getCurrent().getDesktop().getWebApp().getAttribute(WebAppListener.SHARED_SRU_CLIENT);
-        setUpSRUVersion();
         // assign the search controller to desktop, so that it can be accessed to be shutdown when the desktop is destroyed
         Executions.getCurrent().getDesktop().setAttribute(this.getClass().getSimpleName(), this);
 …
         searchRequest.setStartRecord(searchOffset[0] + searchOffset[1]);
         if (resultsItem.hasCorpusHandler()) {
             searchRequest.setExtraRequestData(SRUCQLsearchRetrieve.CORPUS_HANDLE_PARAMETER, resultsItem.getCorpus().getHandle());
+            searchRequest.setExtraRequestData(SRUCQL.SEARCH_CORPUS_HANDLE_PARAMETER, resultsItem.getCorpus().getHandle());
+        }
         try {
 …
     private void setUpSRUVersion() {
         String[] paramValue = Executions.getCurrent().getParameterMap().get("version");
+        String[] paramValue = Executions.getCurrent().getParameterMap().get(SRUCQL.VERSION);
         String versionString = null;
         if (paramValue != null) {
 …
+            }
+        }
         LOGGER.log(Level.INFO, "Received parameter: version[{0}], ", versionString);
+        LOGGER.log(Level.INFO, "Received parameter: {0}[{1}], ", new String[]{SRUCQL.VERSION,versionString});
+    }

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/app/WebAppListener.java

-                      r5034
+                      r5035
 package eu.clarin.sru.fcs.aggregator.app;
 import eu.clarin.sru.fcs.aggregator.cache.ScanCrawlerRunnable;
+import eu.clarin.sru.fcs.aggregator.cache.ScanCrawlTask;
 import eu.clarin.sru.fcs.aggregator.cache.ScanCrawler;
 import eu.clarin.sru.fcs.aggregator.cache.ScanCacheFiled;
 import eu.clarin.sru.fcs.aggregator.cache.ScanCache;
+import eu.clarin.sru.fcs.aggregator.cache.SimpleInMemScanCache;
 import eu.clarin.sru.client.SRUThreadedClient;
 import eu.clarin.sru.client.fcs.ClarinFCSRecordParser;
+import eu.clarin.sru.fcs.aggregator.cache.EndpointUrlFilter;
 import eu.clarin.sru.fcs.aggregator.sopt.CenterRegistryI;
 import eu.clarin.sru.fcs.aggregator.sopt.CenterRegistryLive;
 import eu.clarin.sru.fcs.aggregator.sopt.Languages;
 import eu.clarin.sru.fcs.aggregator.cache.ScanCacheI;
+import eu.clarin.sru.fcs.aggregator.cache.ScanCache;
 import java.io.File;
 import java.io.IOException;
 …
     private String getScanDirectory() {
         //File aggregatorDir = new File(System.getProperty("user.home"), "/." + AGGREGATOR_DIR_NAME);
         File aggregatorDir = new File("/var/www", "/." + AGGREGATOR_DIR_NAME);
+        File aggregatorDir = new File(System.getProperty("user.home"), "/." + AGGREGATOR_DIR_NAME);
+        //File aggregatorDir = new File("/var/www", "/." + AGGREGATOR_DIR_NAME);
         if (!aggregatorDir.exists()) {
 …
         CenterRegistryI centerRegistry = new CenterRegistryLive();
         SRUThreadedClient sruScanClient = (SRUThreadedClient) webapp.getAttribute(WebAppListener.SHARED_SRU_CLIENT);
         //EndpointUrlFilter filter = new EndpointUrlFilter();
+        EndpointUrlFilter filter = new EndpointUrlFilter();
         //filter.urlShouldContainAnyOf("leipzig", ".mpi.nl");
         //filter.urlShouldContainAnyOf("uni-tuebingen.de", ".mpi.nl");
         //filter.urlShouldContainAnyOf("dspin.dwds.de", "lindat.");
         //ScanCrawler scanCrawler = new ScanCrawler(centerRegistry, sruScanClient, filter, maxDepth);
         ScanCrawler scanCrawler = new ScanCrawler(centerRegistry, sruScanClient, null, CACHE_MAX_DEPTH);
         ScanCacheI scanCache;
+        filter.urlShouldContainAnyOf("dspin.dwds.de", "lindat.");
+        ScanCrawler scanCrawler = new ScanCrawler(centerRegistry, sruScanClient, filter, CACHE_MAX_DEPTH);
+        //ScanCrawler scanCrawler = new ScanCrawler(centerRegistry, sruScanClient, null, CACHE_MAX_DEPTH);
+        ScanCache scanCache;
         //synchronized (scanCrawler) {
 …
             } catch (Exception e) {
                 LOGGER.log(Level.SEVERE, "Error while reading the scan cache!", e);
                 scanCache = new ScanCache();
+                scanCache = new SimpleInMemScanCache();
+            }
         //}
 …
         scheduler.scheduleAtFixedRate(
                 new ScanCrawlerRunnable(scanCrawler, scanCacheFiled, webapp),
+                new ScanCrawlTask(scanCrawler, scanCacheFiled, webapp),
 , CACHE_UPDATE_INTERVAL, CACHE_UPDATE_INTERVAL_UNIT);

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/cache/EndpointFilter.java

-                      r5034
+                      r5035
 /**
+ *
+ * Filter for the cache of scan data (endpoint/resources descriptions) - for
+ * specifying if only particular endpoints have not to be cached. Useful for
+ * testing the endpoints.
+ *
  * @author yanapanchenko
  */

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/cache/EndpointUrlFilter.java

-                      r5034
+                      r5035
 /**
+ *
+ * Filters for the cache of scan data (endpoint/resources descriptions) based
+ * on endpoint url. Only endpoints containing one of the specified string in the
+ * endpoint url will be cached. Useful for testing the endpoints.
+ *
  * @author yanapanchenko
  */

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/cache/ScanCache.java

-                      r5034
+                      r5035
 import eu.clarin.sru.fcs.aggregator.sopt.Corpus;
+import eu.clarin.sru.fcs.aggregator.sopt.InstitutionI;
+import eu.clarin.sru.fcs.aggregator.cache.ScanCacheI;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedHashMap;
+import eu.clarin.sru.fcs.aggregator.sopt.Institution;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
-import java.util.logging.Logger;
 /**
+ * Interface for the cached scan data (endpoints descriptions).
+ *
  * @author yanapanchenko
+ * @author Yana Panchenko
  */
 public class ScanCache implements ScanCacheI {
+public interface ScanCache {
+    private Map<String, List<Corpus>> enpUrlToRootCorpora = new LinkedHashMap<String, List<Corpus>>(30);
+    private Map<String, List<Corpus>> corpusToChildren = new HashMap<String, List<Corpus>>();
+    private Map<String, String> childToParent = new HashMap<String, String>();
+    private Map<String, Set<Corpus>> langToRootCorpora = new HashMap<String, Set<Corpus>>();
+    private Map<String, Set<Corpus>> langToTopUniqueCorpora = new HashMap<String, Set<Corpus>>();
+    private List<InstitutionI> institutions = new ArrayList<InstitutionI>();
+    /**
+     * Checks whether the Cache has the endpoints resource tree cached.
+     * @return true if the Cache is empty, false otherwise
+     */
+    public boolean isEmpty();
+    /**
+     * Gets all the institutions registered in center repository
+     * as having a SRU/CQL endpoint
+     * @return list of institutions that support SRU/CQL
+     */
+    public List<Institution> getInstitutions();
+    private static final Logger LOGGER = Logger.getLogger(ScanCache.class.getName());
+    /**
+     * Gets all the root corpora of the endpoints (top nodes in the corpus
+     * resource tree)
+     * @return root corpora of the endpoints
+     */
+    public List<Corpus> getRootCorpora();
+     /**
+     * Gets languages mapped to all the root corpora of the endpoints
+     * (top nodes in the corpus resource tree) that have the corresponding language
+     * @return a map from the languages to the root corpora of the endpoints
+     * in the corresponding language
+     */
+    public Map<String, Set<Corpus>> getRootCorporaForLang();
+    /**
+     * Gets all the root corpora of the endpoints (top nodes in the corpus
+     * resource tree) that have the specified language
+     * @param lang language of interest as a three-letter iso code
+     * @return root corpora of the endpoints that are in the specified language
+     */
+    public List<Corpus> getRootCorporaForLang(String lang);
+    /**
+     * In the corpus resource tree, gets all the languages mapped to the corpus
+     * resources that have the corresponding language as the only one language,
+     * and either have no parent resource or its parent has multiple languages.
+     * @return map of languages to top corpora (in the corpus resource tree)
+     * that have the corresponding language as the only language
+     */
+    public Map<String, Set<Corpus>> getTopUniqueLangToCorpora();
+    /**
+     * In the corpus resource tree, gets all the resource nodes of the tree
+     * that have the specified language as the only language of the resource,
+     * and either have no parent resource or its parent resource has multiple
+     * languages.
+     * @param lang language of interest as three-letter iso code
+     * @return corpora that have the specified language as the only language
+     * and whose parent corpora do not have it as the only language
+     */
+    public List<Corpus> getTopUniqueLanguageCorpora(String lang);
+    /**
+     * Gets all the root corpora of the specified endpoints
+     * @param enpointUrl the URL of the endpoint of interest
+     * @return root corpora of the endpoint
+     */
+    public List<Corpus> getRootCorporaOfEndpoint(String enpointUrl);
+    /**
+     * Gets all the languages of the existing corpora in the corpus resource
+     * tree (since parent corpora sum-up languages of their children corpora,
+     * that's all the languages of the root corpora)
+     * @return all the languages specified in the corpus resource tree
+     */
+    public Set<String> getLanguages();
+    public List<InstitutionI> getInstitutions() {
+        return institutions;
+    }
+    /**
+     * Gets all children corpora of the specified corpus in the endpoints
+     * resource tree
+     * @param corpus the parent corpus
+     * @return children corpora of the specified parent corpus
+     */
+    public List<Corpus> getChildren(Corpus corpus);
+    /**
+     * Adds institution that supports SRU/CQL into the cache.
+     *
+     * @param institution that supports SRU/CQL
+     */
+    public void addInstitution(Institution institution);
+    /**
+     * Adds corpus that is the top level resource of an endpoint
+     * (root corpus that has no parent corpus) into the cache.
+     * Same as addCorpus(c, null)
+     *
+     * @param c root corpus to be added
+     */
+    public void addCorpus(Corpus c);
     @Override
     public List<Corpus> getRootCorporaOfEndpoint(String enpointUrl) {
         List<Corpus> roots = new ArrayList<Corpus>();
         if (enpUrlToRootCorpora.containsKey(enpointUrl)) {
             roots.addAll(enpUrlToRootCorpora.get(enpointUrl));
+        }
         return roots;
+    }
+    /**
+     * Adds corpus into the cache.
+     *
+     * @param c corpus to be added
+     * @param parentCorpus parent of the corpus to be added
+     */
+    public void addCorpus(Corpus c, Corpus parentCorpus);
+    public List<Corpus> getChildrenCorpora(String handle) {
+        List<Corpus> children = new ArrayList<Corpus>();
+        if (corpusToChildren.containsKey(handle)) {
+            children.addAll(corpusToChildren.get(handle));
+        }
+        return children;
+    }
+    public void addInstitution(InstitutionI institution) {
+        institutions.add(institution);
+    }
+    public void addCorpus(Corpus c) {
+        addCorpus(c, true, null);
+    }
+    public void addCorpus(Corpus c, Corpus parentCorpus) {
+        addCorpus(c, false, parentCorpus);
+    }
+    public void addCorpus(Corpus c, boolean root, Corpus parentCorpus) {
+        // index top corpora with unique language as for their languages
+        //if (c.getLanguages().size() == 1 &&
+        //        (root || this.))
+        // don't add corpus that introduces cyclic references
+        if (this.childToParent.containsKey(c.getHandle())) {
+            // as of March 2014, there are 2 such endpoints...
+            LOGGER.warning("Cyclic reference in corpus " + c.getHandle() + " of endpoint " + c.getEndpointUrl());
+            return;
+        }
+        if (root) {
+            // index root corpora as for their languages
+            for (String lang : c.getLanguages()) {
+                if (!langToRootCorpora.containsKey(lang)) {
+                    langToRootCorpora.put(lang, new HashSet<Corpus>());
+                }
+                langToRootCorpora.get(lang).add(c);
+            }
+            // index root corpora as for their endpint url
+            if (!enpUrlToRootCorpora.containsKey(c.getEndpointUrl())) {
+                enpUrlToRootCorpora.put(c.getEndpointUrl(), new ArrayList<Corpus>());
+            }
+            enpUrlToRootCorpora.get(c.getEndpointUrl()).add(c);
+            childToParent.put(c.getHandle(), Corpus.ROOT_HANDLE);
+        } else {
+            if (!corpusToChildren.containsKey(parentCorpus.getHandle())) {
+                corpusToChildren.put(parentCorpus.getHandle(), new ArrayList<Corpus>());
+            }
+            corpusToChildren.get(parentCorpus.getHandle()).add(c);
+            childToParent.put(c.getHandle(), parentCorpus.getHandle());
+        }
+    }
+    @Override
+    public String toString() {
+        return "cache{\n" + "institutions=" + institutions + "\n"
+                + "enpUrlToRootCorpora=" + enpUrlToRootCorpora
+                + "\n corpusToChildren=" + corpusToChildren
+                + "\n langToTopUniqueCorpora=" + langToTopUniqueCorpora + "\n}";
+    }
+    @Override
+    public boolean isEmpty() {
+        return enpUrlToRootCorpora.isEmpty();
+    }
+    @Override
+    public List<Corpus> getRootCorpora() {
+        List<Corpus> rootCorpora = new ArrayList<Corpus>(enpUrlToRootCorpora.size());
+        for (List<Corpus> corpora : this.enpUrlToRootCorpora.values()) {
+            rootCorpora.addAll(corpora);
+        }
+        return rootCorpora;
+    }
+    @Override
+    public Set<String> getLanguages() {
+        Set<String> languages = new HashSet<String>(this.langToRootCorpora.size());
+        languages.addAll(this.langToRootCorpora.keySet());
+        return languages;
+    }
+    @Override
+    public List<Corpus> getChildren(Corpus corpus) {
+        List<Corpus> corpora = this.corpusToChildren.get(corpus.getHandle());
+        if (corpora == null) {
+            return (new ArrayList<Corpus>());
+        } else {
+            List<Corpus> corporaCopy = new ArrayList<Corpus>(corpora);
+            return corporaCopy;
+        }
+    }
+    @Override
+    public Map<String, Set<Corpus>> getRootCorporaForLang() {
+        return langToRootCorpora;
+    }
+    @Override
+    public List<Corpus> getRootCorporaForLang(String lang) {
+        List<Corpus> rootCorpora = new ArrayList<Corpus>(enpUrlToRootCorpora.size());
+        for (List<Corpus> corpora : this.enpUrlToRootCorpora.values()) {
+            for (Corpus corpus : corpora) {
+                if (corpus.getLanguages().contains(lang)) {
+                    rootCorpora.add(corpus);
+                }
+            }
+        }
+        return rootCorpora;
+    }
+    @Override
+    public Map<String, Set<Corpus>> getTopUniqueLangToCorpora() {
+        throw new UnsupportedOperationException("Not supported yet.");
+    }
+    @Override
+    public List<Corpus> getTopUniqueLanguageCorpora(String lang) {
+        throw new UnsupportedOperationException("Not supported yet.");
+    }
+    /**
+     * Gets Corpus that corresponds to the specified handle.
+     *
+     * @param handle of the Corpus of interest.
+     * @return Corpus of the handle or null if there is no Corpus with
+     * this handle in the cache.
+     */
+    public Corpus getCorpus(String handle);
+}

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/cache/ScanCacheFiled.java

-                      r5034
+                      r5035
 package eu.clarin.sru.fcs.aggregator.cache;
-import eu.clarin.sru.fcs.aggregator.app.CacheCorporaScanIntoFileTask;
-import eu.clarin.sru.fcs.aggregator.cache.ScanCache;
 import eu.clarin.sru.fcs.aggregator.sopt.Corpus;
 import eu.clarin.sru.fcs.aggregator.sopt.Endpoint;
 import eu.clarin.sru.fcs.aggregator.sopt.Institution;
-import eu.clarin.sru.fcs.aggregator.sopt.InstitutionI;
 import java.io.BufferedOutputStream;
 import java.io.BufferedReader;
 …
 /**
+ * Utility for reading/writing scan data (endpoints descriptions) from/to
+ * ScanCache from/to local files.
+ *
  * @author yanapanchenko
 …
     private static final Logger LOGGER = Logger.getLogger(ScanCacheFiled.class.getName());
+    /**
+     * Constructs ScanCache/files reading/writing utility.
+     *
+     * @param scanDirectory path to local directory were files with
+     * ScanCache data are/should be stored.
+     */
     public ScanCacheFiled(String scanDirectory) {
         this.scanDirectory = scanDirectory;
+    }
+    /**
+     * Writes ScanCache data (endpoints and resources descriptions) in a special
+     * simple plain text format into the files.
+     *
+     * @param cache ScanCache the data of which should be written into the files.
+     */
     public void write(ScanCache cache) {
 …
             File sruInstitutionsFile = new File(scanDirectory, INSTITUTION_ENDPOINTS_FILENAME);
             os = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(sruInstitutionsFile)), ENCODING);
             for (InstitutionI institution : cache.getInstitutions()) {
+            for (Institution institution : cache.getInstitutions()) {
                 writeInstitutionInfo(os, institution);
                 for (Endpoint endp : institution.getEndpoints()) {
 …
             writeCorpusInfo(os, c);
             List<Corpus> children = cache.getChildrenCorpora(c.getHandle());
+            List<Corpus> children = cache.getChildren(c);
             if (children != null) {
                 for (Corpus child : children) {
 …
+    }
     private void writeInstitutionInfo(Writer writer, InstitutionI institution) throws IOException {
+    private void writeInstitutionInfo(Writer writer, Institution institution) throws IOException {
         writer.write(I);
 …
+    }
+    /**
+     * Reads ScanCache data from the files in scanDirectory directory. The files
+     * contain endpoint and resources descriptions in a special simple plain
+     * text format, resulting from applying write(ScanCache scanCache) method.
+     *
+     * @return ScanCache with the data read from the files containing endpoints
+     * and resources descriptions.
+     */
     public ScanCache read() {
         ScanCache cache = new ScanCache();
+        SimpleInMemScanCache cache = new SimpleInMemScanCache();
         File sruInstitutionsFile = new File(scanDirectory, INSTITUTION_ENDPOINTS_FILENAME);
         BufferedReader reader = null;
 …
                 line = line.trim();
                 if (line.length() > 0) {
                     String[] splitted = line.split("\\" + CacheCorporaScanIntoFileTask.SEP);
                     if (splitted.length == 2 && splitted[0].equals(CacheCorporaScanIntoFileTask.I)) {
+                    String[] splitted = line.split("\\" + SEP);
+                    if (splitted.length == 2 && splitted[0].equals(I)) {
                         inst = new Institution(splitted[1], "");
                         if (!institutions.contains(inst)) {
 …
                             cache.addInstitution(inst);
+                        }
                     } else if (inst != null && splitted.length == 3 && splitted[0].equals(CacheCorporaScanIntoFileTask.IE)) {
+                    } else if (inst != null && splitted.length == 3 && splitted[0].equals(IE)) {
                         Endpoint ep = inst.add(splitted[2]);
                         if (!splitted[1].trim().isEmpty()) {
 …
+    }
     private void readAndAddCorpus(String path, Corpus parentCorpus, InstitutionI inst, ScanCache cache) {
+    private void readAndAddCorpus(String path, Corpus parentCorpus, Institution inst, SimpleInMemScanCache cache) {
         File corpusFile = new File(path, CORPUS_INFO_FILENAME);
         BufferedReader reader = null;
 …
                         // corpus langs
                         Set<String> langs = new HashSet<String>();
                         for (String lang : line.split("\\" + CacheCorporaScanIntoFileTask.SEP)) {
+                        for (String lang : line.split("\\" + SEP)) {
                             langs.add(lang);
+                        }

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/cache/ScanCrawlTask.java

-                      r5034
+                      r5035
 package eu.clarin.sru.fcs.aggregator.cache;
-import eu.clarin.sru.fcs.aggregator.cache.ScanCrawler;
-import eu.clarin.sru.fcs.aggregator.cache.ScanCacheFiled;
-import eu.clarin.sru.fcs.aggregator.cache.ScanCache;
 import static eu.clarin.sru.fcs.aggregator.app.WebAppListener.CORPUS_CACHE;
 import java.util.logging.Level;
 …
 /**
+ *
+ * A task for crawling endpoint scan operation responses of FCS specification.
+ * If successful, saves found endpoints and resources descriptions into a new
+ * ScanCache and updates the web application contexts with this new cache, as
+ * well as rewrites the previously scanned data saved on the disk.
+ *
  * @author yanapanchenko
  */
 public class ScanCrawlerRunnable implements Runnable {
+public class ScanCrawlTask implements Runnable {
     private static final Logger logger = Logger.getLogger(ScanCrawlerRunnable.class.getName());
+    private static final Logger logger = Logger.getLogger(ScanCrawlTask.class.getName());
     private final ScanCrawler scanCrawler;
 …
     private WebApp webapp;
     public ScanCrawlerRunnable(
+    public ScanCrawlTask(
             ScanCrawler scanCrawler, ScanCacheFiled scanCacheFiled, WebApp webapp) {
         this.scanCrawler = scanCrawler;
 …
         logger.info("STARTING CACHING CORPORA SCAN");
+        ScanCache cacheNew = scanCrawler.crawl();
+        SimpleInMemScanCache cacheNew = new SimpleInMemScanCache();
+        scanCrawler.crawl(cacheNew);
         logger.info("New Cache, number of root corpora: " + cacheNew.getRootCorpora().size());

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/cache/ScanCrawler.java

-                      r5034
+                      r5035
 package eu.clarin.sru.fcs.aggregator.cache;
-import eu.clarin.sru.fcs.aggregator.cache.ScanCache;
 import eu.clarin.sru.client.SRUScanRequest;
 import eu.clarin.sru.client.SRUScanResponse;
 …
 import eu.clarin.sru.fcs.aggregator.sopt.Corpus;
 import eu.clarin.sru.fcs.aggregator.sopt.Endpoint;
 import eu.clarin.sru.fcs.aggregator.sopt.InstitutionI;
 import eu.clarin.sru.fcs.aggregator.util.SRUCQLscan;
+import eu.clarin.sru.fcs.aggregator.sopt.Institution;
+import eu.clarin.sru.fcs.aggregator.util.SRUCQL;
 import java.util.List;
 import java.util.concurrent.Future;
 …
 /**
+ *
+ * Crawler for collecting endpoint scan operation responses of FCS specification.
+ * Collects all the endpoints and resources descriptions.
+ *
  * @author yanapanchenko
  */
 …
+    }
+    public ScanCache crawl() {
+        ScanCache cache = new ScanCache();
+    /**
+     * Crawler of scan operation of FCS specification. Collects all the endpoints
+     * and resources descriptions into the provided cache.
+     *
+     * @param cache cache into which the endpoints and resources descriptions
+     * from scan operation responses should be collected.
+     */
+    public void crawl(ScanCache cache) {
         //TODO remember not responding root corpora and come back to them later... ?
         List<InstitutionI> institutions = cr.getCQLInstitutions();
+        List<Institution> institutions = cr.getCQLInstitutions();
         //LOGGER.info(institutions.toString());
         for (InstitutionI institution : institutions) {
+        for (Institution institution : institutions) {
             cache.addInstitution(institution);
             Iterable<Endpoint> endpoints = institution.getEndpoints();
 …
+        }
-        return cache;
+    }
+    // TODO: ask Oliver to add API support for the extra info in the
+    // SRU client/server libraries, so that it's not necessary to work
+    // with DocumentFragment
     private void addExtraInfo(Corpus c, SRUTerm term) {
 …
     private void addCorpora(SRUThreadedClient sruScanClient, String endpointUrl,
+            InstitutionI institution, int depth, Corpus parentCorpus, ScanCache cache) {
+        //System.out.println("Adding Corpora: " + endpointUrl + " " + handle);
+            Institution institution, int depth, Corpus parentCorpus, ScanCache cache) {
         Future<SRUScanResponse> corporaResponse = null;
 …
             SRUScanRequest corporaRequest = new SRUScanRequest(endpointUrl);
             StringBuilder scanClause = new StringBuilder(SRUCQLscan.RESOURCE_PARAMETER);
+            StringBuilder scanClause = new StringBuilder(SRUCQL.SCAN_RESOURCE_PARAMETER);
             scanClause.append("=");
-            //String normalizedHandle = normalizeHandle(handle, root);
             String normalizedHandle = normalizeHandle(parentCorpus, root);
             scanClause.append(normalizedHandle);
             corporaRequest.setScanClause(scanClause.toString());
+            corporaRequest.setExtraRequestData(SRUCQLscan.RESOURCE_INFO_PARAMETER, "true");
+            corporaRequest.setExtraRequestData(SRUCQL.SCAN_RESOURCE_INFO_PARAMETER,
+                    SRUCQL.SCAN_RESOURCE_INFO_PARAMETER_DEFAULT_VALUE);
             corporaResponse = sruScanClient.scan(corporaRequest);
             Thread.sleep(5000);
 …
             if (response != null && response.hasTerms()) {
                 for (SRUTerm term : response.getTerms()) {
+                    // don't add corpus that introduces cyclic references
+                    // as of March 2014, there are 2 such endpoints...
+                    if (cache.getCorpus(term.getValue())!= null) {
+                        LOGGER.warning("Cyclic reference in corpus " + term.getValue() + " of endpoint " + endpointUrl);
+                        continue;
+                    }
                     Corpus c = new Corpus(institution, endpointUrl);
                     c.setHandle(term.getValue());
 …
                     c.setNumberOfRecords(term.getNumberOfRecords());
                     addExtraInfo(c, term);
                     cache.addCorpus(c, root, parentCorpus);
+                    cache.addCorpus(c, parentCorpus);
                     addCorpora(sruScanClient, c.getEndpointUrl(), c.getInstitution(),
                             depth, c, cache);

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/cache/SimpleInMemScanCache.java

-                      r5034
+                      r5035
 import eu.clarin.sru.fcs.aggregator.sopt.Corpus;
+import eu.clarin.sru.fcs.aggregator.sopt.InstitutionI;
+import eu.clarin.sru.fcs.aggregator.cache.ScanCacheI;
+import eu.clarin.sru.fcs.aggregator.sopt.Institution;
 import java.util.ArrayList;
 import java.util.HashMap;
 …
 /**
+ * Implementation of the cached scan data (endpoints descriptions) that
+ * stores the cache in memory in maps.
+ *
  * @author yanapanchenko
  */
 public class ScanCache implements ScanCacheI {
+public class SimpleInMemScanCache implements ScanCache {
     private Map<String, List<Corpus>> enpUrlToRootCorpora = new LinkedHashMap<String, List<Corpus>>(30);
     private Map<String, List<Corpus>> corpusToChildren = new HashMap<String, List<Corpus>>();
+    private Map<String, String> childToParent = new HashMap<String, String>();
+    //private Map<String, String> childToParent = new HashMap<String, String>();
+    private Map<String, Corpus> handleToCorpus = new HashMap<String, Corpus>();
     private Map<String, Set<Corpus>> langToRootCorpora = new HashMap<String, Set<Corpus>>();
     private Map<String, Set<Corpus>> langToTopUniqueCorpora = new HashMap<String, Set<Corpus>>();
     private List<InstitutionI> institutions = new ArrayList<InstitutionI>();
+    private List<Institution> institutions = new ArrayList<Institution>();
     private static final Logger LOGGER = Logger.getLogger(ScanCache.class.getName());
+    private static final Logger LOGGER = Logger.getLogger(SimpleInMemScanCache.class.getName());
     public List<InstitutionI> getInstitutions() {
+    @Override
+    public List<Institution> getInstitutions() {
         return institutions;
+    }
 …
+    }
+    public List<Corpus> getChildrenCorpora(String handle) {
+        List<Corpus> children = new ArrayList<Corpus>();
+        if (corpusToChildren.containsKey(handle)) {
+            children.addAll(corpusToChildren.get(handle));
+        }
+        return children;
+    }
+    public void addInstitution(InstitutionI institution) {
+    @Override
+    public void addInstitution(Institution institution) {
         institutions.add(institution);
+    }
+    @Override
     public void addCorpus(Corpus c) {
         addCorpus(c, true, null);
+        addCorpus(c, null);
+    }
+    @Override
     public void addCorpus(Corpus c, Corpus parentCorpus) {
-        addCorpus(c, false, parentCorpus);
+    }
-    public void addCorpus(Corpus c, boolean root, Corpus parentCorpus) {
         // index top corpora with unique language as for their languages
         //if (c.getLanguages().size() == 1 &&
         //        (root || this.))
-        // don't add corpus that introduces cyclic references
-        if (this.childToParent.containsKey(c.getHandle())) {
-            // as of March 2014, there are 2 such endpoints...
-            LOGGER.warning("Cyclic reference in corpus " + c.getHandle() + " of endpoint " + c.getEndpointUrl());
-            return;
+        }
+        if (root) {
+        handleToCorpus.put(c.getHandle(), c);
+        if (parentCorpus == null) { //i.e it's a root corpus
             // index root corpora as for their languages
             for (String lang : c.getLanguages()) {
 …
+            }
             enpUrlToRootCorpora.get(c.getEndpointUrl()).add(c);
             childToParent.put(c.getHandle(), Corpus.ROOT_HANDLE);
+            //childToParent.put(c.getHandle(), Corpus.ROOT_HANDLE);
         } else {
             if (!corpusToChildren.containsKey(parentCorpus.getHandle())) {
 …
+            }
             corpusToChildren.get(parentCorpus.getHandle()).add(c);
             childToParent.put(c.getHandle(), parentCorpus.getHandle());
+            //childToParent.put(c.getHandle(), parentCorpus.getHandle());
+        }
+    }
 …
         throw new UnsupportedOperationException("Not supported yet.");
+    }
+    @Override
+    public Corpus getCorpus(String handle) {
+        return this.handleToCorpus.get(handle);
+    }
+}

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/rest/AggregatorService.java

-                      r5034
+                      r5035
 /**
+ *
+ * RESTful service. At the moment does nothing useful and was added just to
+ * make sure that it would be possible to combine REST services with ZK app,
+ * and add to the aggregator the support for its usage as aggregated FCS
+ * server, as was planned in the initial FCS specification.
+ *
  * @author yanapanchenko
  */

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/sopt/CenterRegistryForTesting.java

-                      r3044
+                      r5035
 /**
+ * Center registry node. Its children are centers (institutions).
+ * Center registry node. Its children are centers (institutions).
+ * The class is created after a request from MPI to provide them
+ * with a possibility to test their endpoints on development
+ * servers with the aggregator before they put them on production
+ * server. Institutions and endpoint urls that need to be tested are hard-coded.
+ *
  * @author Yana Panchenko
 …
     private static final Logger logger = Logger.getLogger(CenterRegistryForTesting.class.getName());
     private boolean hasChildrenLoaded = false;
     private List<InstitutionI> centers = new ArrayList<InstitutionI>();
+    private List<Institution> centers = new ArrayList<Institution>();
     private static final String[] INSTITUTION_URLS = new String[]{
         "http://130.183.206.32/restxml/5"
 …
     @Override
     public List<InstitutionI> getCQLInstitutions() {
+    public List<Institution> getCQLInstitutions() {
         loadCQLInstitutions();
         return centers;
 …
     @Override
     public InstitutionI getCQLInstitution(int index) {
+    public Institution getCQLInstitution(int index) {
         loadCQLInstitutions();
         if (index >= centers.size()) {
 …
     private void loadCQLInstitutionsForTesting() {
         for (int i = 0; i < INSTITUTION_ENDPOINTS.length; i++) {
             InstitutionI institution = new Institution(INSTITUTION_NAMES[i], INSTITUTION_URLS[i]);
+            Institution institution = new Institution(INSTITUTION_NAMES[i], INSTITUTION_URLS[i]);
             for (int j = 0; j < INSTITUTION_ENDPOINTS.length; j++) {
                 institution.add(INSTITUTION_ENDPOINTS[i][j]);

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/sopt/CenterRegistryI.java

-                      r3044
+                      r5035
     public void loadCQLInstitutions();
     public List<InstitutionI> getCQLInstitutions();
+    public List<Institution> getCQLInstitutions();
     public InstitutionI getCQLInstitution(int index);
+    public Institution getCQLInstitution(int index);
+}

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/sopt/CenterRegistryLive.java

-                      r3044
+                      r5035
     //https://centerregistry-clarin.esc.rzg.mpg.de/restxml/
     private boolean hasInstitutionsLoaded = false;
     private List<InstitutionI> centers = new ArrayList<InstitutionI>();
+    private List<Institution> centers = new ArrayList<Institution>();
     @Override
 …
                 String institutionUrl = regCenter.getId();
                 String institutionName = regCenter.getCenterName();
                 InstitutionI institution = new Institution(institutionName, institutionUrl);
+                Institution institution = new Institution(institutionName, institutionUrl);
                 // display in the tree only those institutions that have CQL endpoints:
                 CenterProfile profile = connector.retrieveCenterProfile(regCenter);
 …
     @Override
     public List<InstitutionI> getCQLInstitutions() {
+    public List<Institution> getCQLInstitutions() {
         loadCQLInstitutions();
         return centers;
 …
     @Override
     public InstitutionI getCQLInstitution(int index) {
+    public Institution getCQLInstitution(int index) {
         loadCQLInstitutions();
         if (index >= centers.size()) {

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/sopt/Corpus.java

-                      r3058
+                      r5035
     public static final String ROOT_HANDLE = "root";
     public static final Pattern HANDLE_WITH_SPECIAL_CHARS = Pattern.compile(".*[<>=/()\\s].*");
     private InstitutionI institution;
+    private Institution institution;
     private String endpointUrl;
     private String handle;
 …
+    }
     public Corpus(InstitutionI institution, String endpointUrl) {
+    public Corpus(Institution institution, String endpointUrl) {
         this.institution = institution;
         this.endpointUrl = endpointUrl;
 …
+    }
     public InstitutionI getInstitution() {
+    public Institution getInstitution() {
         return institution;
+    }

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/sopt/CorpusModelCached.java

-                      r5034
+                      r5035
 package eu.clarin.sru.fcs.aggregator.sopt;
 import eu.clarin.sru.fcs.aggregator.cache.ScanCacheI;
+import eu.clarin.sru.fcs.aggregator.cache.ScanCache;
 import java.util.ArrayList;
 import java.util.Arrays;
 …
     private static final Logger logger = Logger.getLogger(CorpusModelCached.class.getName());
     private Map<String, Set<Corpus>> selectedCorpora = new HashMap<String, Set<Corpus>>();
     private ScanCacheI cache;
+    private ScanCache cache;
     public CorpusModelCached(ScanCacheI cache) {
+    public CorpusModelCached(ScanCache cache) {
         super(new DefaultTreeNode(new Corpus(), new ArrayList<DefaultTreeNode<Corpus>>()));
         this.cache = cache;

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/sopt/CorpusModelLive.java

-                      r3058
+                      r5035
 import eu.clarin.sru.fcs.aggregator.app.WebAppListener;
 import static eu.clarin.sru.fcs.aggregator.sopt.Corpus.ROOT_HANDLE;
 import eu.clarin.sru.fcs.aggregator.util.SRUCQLscan;
+import eu.clarin.sru.fcs.aggregator.util.SRUCQL;
 import java.util.ArrayList;
 import java.util.Arrays;
 …
     private void initRootChildren(CenterRegistryI startingPoint) {
         for (InstitutionI instit : startingPoint.getCQLInstitutions()) {
+        for (Institution instit : startingPoint.getCQLInstitutions()) {
             for (Endpoint endp : instit.getEndpoints()) {
                 try {
 …
                     Future<SRUScanResponse> corporaResponse = null;
                     SRUScanRequest corporaRequest = new SRUScanRequest(endp.getUrl());
                     StringBuilder scanClause = new StringBuilder(SRUCQLscan.RESOURCE_PARAMETER);
+                    StringBuilder scanClause = new StringBuilder(SRUCQL.SCAN_RESOURCE_PARAMETER);
                     scanClause.append("=");
                     scanClause.append(ROOT_HANDLE);
                     corporaRequest.setScanClause(scanClause.toString());
                     corporaRequest.setExtraRequestData(SRUCQLscan.RESOURCE_INFO_PARAMETER, "true");
+                    corporaRequest.setExtraRequestData(SRUCQL.SCAN_RESOURCE_INFO_PARAMETER, "true");
                     corporaResponse = sruClient.scan(corporaRequest);
                     SRUScanResponse response = corporaResponse.get(200, TimeUnit.SECONDS);
 …
         try {
             SRUScanRequest corporaRequest = new SRUScanRequest(corpus.getEndpointUrl());
             StringBuilder scanClause = new StringBuilder(SRUCQLscan.RESOURCE_PARAMETER);
+            StringBuilder scanClause = new StringBuilder(SRUCQL.SCAN_RESOURCE_PARAMETER);
             scanClause.append("=");
             String resourceValue = corpus.getHandle();

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/sopt/Endpoint.java

-                      r5034
+                      r5035
     private String url;
     private InstitutionI institution;
+    private Institution institution;
     public Endpoint(String url, InstitutionI institution) {
+    public Endpoint(String url, Institution institution) {
         this.url = url;
         this.institution = institution;
 …
+    }
     public InstitutionI getInstitution() {
+    public Institution getInstitution() {
         return institution;
+    }
     public void setInstitution(InstitutionI institution) {
+    public void setInstitution(Institution institution) {
         this.institution = institution;
+    }

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/sopt/Institution.java

-                      r5034
+                      r5035
  * @author Yana Panchenko
  */
     public class Institution implements InstitutionI {
+    public class Institution {
     private String name;
 …
+    }
-    @Override
     public Endpoint add(String endpointUrl) {
         Endpoint ep = getEndpoint(endpointUrl);
 …
+    }
-    @Override
     public String getName() {
         return name;
+    }
-    @Override
     public String getLink() {
         return link;
+    }
-    @Override
    public List<Endpoint> getEndpoints() {
         return  this.endpoints;
 …
-    @Override
     public Endpoint getEndpoint(int index) {
         if (index >= endpoints.size()) {
 …
+    }
-    @Override
     public Endpoint getEndpoint(String endpointUrl) {
         for (Endpoint ep : endpoints) {
 …
+    }
-    @Override
     public String toString() {
         if (name != null && name.length() > 0) {
 …
+    }
+}

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/sopt/Languages.java

-                      r3044
+                      r5035
 import java.io.BufferedReader;
-import java.io.FileNotFoundException;
-import java.io.FileReader;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
-import java.io.UnsupportedEncodingException;
-import java.util.Arrays;
 import java.util.HashMap;
-import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;
 …
     private Map<String,String> code22Code = new HashMap<String,String>();
     public static final String LANGUAGES_FILE_PATH = "/lang/ISO-639-2_utf-8.txt";
+    public static final String LANGUAGES_FILE_ENCODING = "UTF-8";
+    public static final String ANY_LANGUAGE_NAME = "anylang";
     public Languages() {
 …
         BufferedReader br = null;
         try {
             br = new BufferedReader(new InputStreamReader(is, "UTF-8"));
+            br = new BufferedReader(new InputStreamReader(is, LANGUAGES_FILE_ENCODING));
             String line;
             while ((line = br.readLine()) != null) {

SRUAggregator/trunk/src/test/java/eu/clarin/sru/fcs/aggregator/app/ScanCacheFiledTest.java

-                      r5034
+                      r5035
 import eu.clarin.sru.fcs.aggregator.cache.ScanCacheFiled;
+import eu.clarin.sru.fcs.aggregator.cache.SimpleInMemScanCache;
 import eu.clarin.sru.fcs.aggregator.cache.ScanCache;
 import eu.clarin.sru.fcs.aggregator.sopt.Corpus;
 …
 public class ScanCacheFiledTest {
 //
 //
 //    @Test
 //    public void testReadWriteDepth1() {
 //        String scanDir = "/scan-bas";
 //        String scanPath1 = this.getClass().getResource(scanDir).getFile();
 //        String scanPath2 = "/tmp/scan-bas";
 //        File scanDir2 = new File(scanPath2);
 //        if (!scanDir2.exists()) {
 //            scanDir2.mkdir();
 //        }
 //
 //        ScanCacheFiled scanFiled1 = new ScanCacheFiled(scanPath1);
 //        ScanCache cacheOrig = scanFiled1.read();
 //
 //        ScanCacheFiled scanFiled2 = new ScanCacheFiled(scanPath2);
 //        scanFiled2.write(cacheOrig);
 //
 //        ScanCacheFiled scanFiled3 = new ScanCacheFiled(scanPath2);
 //        ScanCache cacheRewritten = scanFiled3.read();
 //
 //        //make sure caches contain the same info after read-write
 //        Assert.assertEquals(cacheOrig.getInstitutions().size(), cacheRewritten.getInstitutions().size());
 //        Endpoint epOrig = cacheOrig.getInstitutions().get(2).getEndpoint(0);
 //        Endpoint epRewritten = cacheRewritten.getInstitutions().get(2).getEndpoint(0);
 //        Assert.assertEquals(epOrig.getUrl(), epRewritten.getUrl());
 //        Assert.assertEquals(epOrig, epRewritten);
 //        List<Corpus> rootCorporaOrig = cacheOrig.getRootCorporaOfEndpoint(epOrig.getUrl());
 //        List<Corpus> rootCorporaRewritten = cacheRewritten.getRootCorporaOfEndpoint(epOrig.getUrl());
 //        Assert.assertEquals(rootCorporaOrig.size(), rootCorporaRewritten.size());
 //        Assert.assertEquals(3, rootCorporaRewritten.size());
 //        Assert.assertEquals(rootCorporaOrig.get(0), rootCorporaRewritten.get(0));
 //        List<Corpus> childenOrig = cacheOrig.getChildrenCorpora(rootCorporaOrig.get(0).getHandle());
 //        List<Corpus> childenRewritten = cacheRewritten.getChildrenCorpora(rootCorporaOrig.get(0).getHandle());
 //        Assert.assertEquals(childenOrig, childenRewritten);
 //        Assert.assertEquals(rootCorporaOrig.get(0).getLanguages(), rootCorporaRewritten.get(0).getLanguages());
 //
 //        //System.out.println(cacheOrig);
 //        //System.out.println();
 //        //System.out.println(cacheRewritten);
 //    }
 //
 //    @Test
 //    public void testReadWriteDepth2() {
 //        String scanDir = "/scan-mpi";
 //        String scanPath1 = this.getClass().getResource(scanDir).getFile();
 //        String scanPath2 = "/tmp/scan-mpi";
 //        File scanDir2 = new File(scanPath2);
 //        if (!scanDir2.exists()) {
 //            scanDir2.mkdir();
 //        }
 //
 //        ScanCacheFiled scanFiled1 = new ScanCacheFiled(scanPath1);
 //        ScanCache cacheOrig = scanFiled1.read();
 //
 //        ScanCacheFiled scanFiled2 = new ScanCacheFiled(scanPath2);
 //        scanFiled2.write(cacheOrig);
 //
 //        ScanCacheFiled scanFiled3 = new ScanCacheFiled(scanPath2);
 //        ScanCache cacheRewritten = scanFiled3.read();
 //
 //        //make sure caches contain the same info after read-write
 //        Assert.assertEquals(cacheOrig.getInstitutions().size(), cacheRewritten.getInstitutions().size());
 //        Endpoint epOrig = cacheOrig.getInstitutions().get(4).getEndpoint(0);
 //        Endpoint epRewritten = cacheRewritten.getInstitutions().get(4).getEndpoint(0);
 //        Assert.assertEquals(epOrig.getUrl(), epRewritten.getUrl());
 //        Assert.assertEquals(epOrig, epRewritten);
 //        List<Corpus> rootCorporaOrig = cacheOrig.getRootCorporaOfEndpoint(epOrig.getUrl());
 //        List<Corpus> rootCorporaRewritten = cacheRewritten.getRootCorporaOfEndpoint(epOrig.getUrl());
 //        Assert.assertEquals(rootCorporaOrig.size(), rootCorporaRewritten.size());
 //        Assert.assertEquals(3, rootCorporaRewritten.size());
 //        Assert.assertEquals(rootCorporaOrig.get(0), rootCorporaRewritten.get(0));
 //        List<Corpus> childenOrig = cacheOrig.getChildrenCorpora(rootCorporaOrig.get(0).getHandle());
 //        List<Corpus> childenRewritten = cacheRewritten.getChildrenCorpora(rootCorporaOrig.get(0).getHandle());
 //        Assert.assertEquals(childenOrig, childenRewritten);
 //        Assert.assertEquals(2, childenRewritten.size());
 //        Assert.assertEquals(rootCorporaOrig.get(0).getLanguages(), rootCorporaRewritten.get(0).getLanguages());
 //
 ////        System.out.println(cacheOrig);
 ////        System.out.println();
 ////        System.out.println(cacheRewritten);
 //    }
 //
 //        @Test
 //    public void testReadWriteDefaultCorpus() {
 //        String scanDir = "/scan-def";
 //        String scanPath1 = this.getClass().getResource(scanDir).getFile();
 //        String scanPath2 = "/tmp/scan-def";
 //        File scanDir2 = new File(scanPath2);
 //        if (!scanDir2.exists()) {
 //            scanDir2.mkdir();
 //        }
 //
 //        ScanCacheFiled scanFiled1 = new ScanCacheFiled(scanPath1);
 //        ScanCache cacheOrig = scanFiled1.read();
 //
 //        ScanCacheFiled scanFiled2 = new ScanCacheFiled(scanPath2);
 //        scanFiled2.write(cacheOrig);
 //
 //        ScanCacheFiled scanFiled3 = new ScanCacheFiled(scanPath2);
 //        ScanCache cacheRewritten = scanFiled3.read();
 //
 //        //make sure caches contain the same info after read-write
 //        Assert.assertEquals(cacheOrig.getInstitutions().size(), cacheRewritten.getInstitutions().size());
 //        Endpoint epOrig = cacheOrig.getInstitutions().get(4).getEndpoint(0);
 //        Endpoint epRewritten = cacheRewritten.getInstitutions().get(4).getEndpoint(0);
 //        Assert.assertEquals(epOrig.getUrl(), epRewritten.getUrl());
 //        Assert.assertEquals(epOrig, epRewritten);
 //        List<Corpus> rootCorporaOrig = cacheOrig.getRootCorporaOfEndpoint(epOrig.getUrl());
 //        List<Corpus> rootCorporaRewritten = cacheRewritten.getRootCorporaOfEndpoint(epOrig.getUrl());
 //        Assert.assertEquals(rootCorporaOrig.size(), rootCorporaRewritten.size());
 //        Assert.assertEquals(1, rootCorporaRewritten.size());
 //        Assert.assertEquals(rootCorporaOrig.get(0).getLanguages(), rootCorporaRewritten.get(0).getLanguages());
 //
 ////        System.out.println(cacheOrig);
 ////        System.out.println();
 ////        System.out.println(cacheRewritten);
 //    }
+    @Test
+    public void testReadWriteDepth1() {
+        String scanDir = "/scan-bas";
+        String scanPath1 = this.getClass().getResource(scanDir).getFile();
+        String scanPath2 = "/tmp/scan-bas";
+        File scanDir2 = new File(scanPath2);
+        if (!scanDir2.exists()) {
+            scanDir2.mkdir();
+        }
+        ScanCacheFiled scanFiled1 = new ScanCacheFiled(scanPath1);
+        ScanCache cacheOrig = scanFiled1.read();
+        ScanCacheFiled scanFiled2 = new ScanCacheFiled(scanPath2);
+        scanFiled2.write(cacheOrig);
+        ScanCacheFiled scanFiled3 = new ScanCacheFiled(scanPath2);
+        ScanCache cacheRewritten = scanFiled3.read();
+        //make sure caches contain the same info after read-write
+        Assert.assertEquals(cacheOrig.getInstitutions().size(), cacheRewritten.getInstitutions().size());
+        Endpoint epOrig = cacheOrig.getInstitutions().get(2).getEndpoint(0);
+        Endpoint epRewritten = cacheRewritten.getInstitutions().get(2).getEndpoint(0);
+        Assert.assertEquals(epOrig.getUrl(), epRewritten.getUrl());
+        Assert.assertEquals(epOrig, epRewritten);
+        List<Corpus> rootCorporaOrig = cacheOrig.getRootCorporaOfEndpoint(epOrig.getUrl());
+        List<Corpus> rootCorporaRewritten = cacheRewritten.getRootCorporaOfEndpoint(epOrig.getUrl());
+        Assert.assertEquals(rootCorporaOrig.size(), rootCorporaRewritten.size());
+        Assert.assertEquals(3, rootCorporaRewritten.size());
+        Assert.assertEquals(rootCorporaOrig.get(0), rootCorporaRewritten.get(0));
+        List<Corpus> childenOrig = cacheOrig.getChildren(rootCorporaOrig.get(0));
+        List<Corpus> childenRewritten = cacheRewritten.getChildren(rootCorporaOrig.get(0));
+        Assert.assertEquals(childenOrig, childenRewritten);
+        Assert.assertEquals(rootCorporaOrig.get(0).getLanguages(), rootCorporaRewritten.get(0).getLanguages());
+        //System.out.println(cacheOrig);
+        //System.out.println();
+        //System.out.println(cacheRewritten);
+    }
+    @Test
+    public void testReadWriteDepth2() {
+        String scanDir = "/scan-mpi";
+        String scanPath1 = this.getClass().getResource(scanDir).getFile();
+        String scanPath2 = "/tmp/scan-mpi";
+        File scanDir2 = new File(scanPath2);
+        if (!scanDir2.exists()) {
+            scanDir2.mkdir();
+        }
+        ScanCacheFiled scanFiled1 = new ScanCacheFiled(scanPath1);
+        ScanCache cacheOrig = scanFiled1.read();
+        ScanCacheFiled scanFiled2 = new ScanCacheFiled(scanPath2);
+        scanFiled2.write(cacheOrig);
+        ScanCacheFiled scanFiled3 = new ScanCacheFiled(scanPath2);
+        ScanCache cacheRewritten = scanFiled3.read();
+        //make sure caches contain the same info after read-write
+        Assert.assertEquals(cacheOrig.getInstitutions().size(), cacheRewritten.getInstitutions().size());
+        Endpoint epOrig = cacheOrig.getInstitutions().get(4).getEndpoint(0);
+        Endpoint epRewritten = cacheRewritten.getInstitutions().get(4).getEndpoint(0);
+        Assert.assertEquals(epOrig.getUrl(), epRewritten.getUrl());
+        Assert.assertEquals(epOrig, epRewritten);
+        List<Corpus> rootCorporaOrig = cacheOrig.getRootCorporaOfEndpoint(epOrig.getUrl());
+        List<Corpus> rootCorporaRewritten = cacheRewritten.getRootCorporaOfEndpoint(epOrig.getUrl());
+        Assert.assertEquals(rootCorporaOrig.size(), rootCorporaRewritten.size());
+        Assert.assertEquals(3, rootCorporaRewritten.size());
+        Assert.assertEquals(rootCorporaOrig.get(0), rootCorporaRewritten.get(0));
+        List<Corpus> childenOrig = cacheOrig.getChildren(rootCorporaOrig.get(0));
+        List<Corpus> childenRewritten = cacheRewritten.getChildren(rootCorporaOrig.get(0));
+        Assert.assertEquals(childenOrig, childenRewritten);
+        Assert.assertEquals(2, childenRewritten.size());
+        Assert.assertEquals(rootCorporaOrig.get(0).getLanguages(), rootCorporaRewritten.get(0).getLanguages());
+//        System.out.println(cacheOrig);
+//        System.out.println();
+//        System.out.println(cacheRewritten);
+    }
+        @Test
+    public void testReadWriteDefaultCorpus() {
+        String scanDir = "/scan-def";
+        String scanPath1 = this.getClass().getResource(scanDir).getFile();
+        String scanPath2 = "/tmp/scan-def";
+        File scanDir2 = new File(scanPath2);
+        if (!scanDir2.exists()) {
+            scanDir2.mkdir();
+        }
+        ScanCacheFiled scanFiled1 = new ScanCacheFiled(scanPath1);
+        ScanCache cacheOrig = scanFiled1.read();
+        ScanCacheFiled scanFiled2 = new ScanCacheFiled(scanPath2);
+        scanFiled2.write(cacheOrig);
+        ScanCacheFiled scanFiled3 = new ScanCacheFiled(scanPath2);
+        ScanCache cacheRewritten = scanFiled3.read();
+        //make sure caches contain the same info after read-write
+        Assert.assertEquals(cacheOrig.getInstitutions().size(), cacheRewritten.getInstitutions().size());
+        Endpoint epOrig = cacheOrig.getInstitutions().get(4).getEndpoint(0);
+        Endpoint epRewritten = cacheRewritten.getInstitutions().get(4).getEndpoint(0);
+        Assert.assertEquals(epOrig.getUrl(), epRewritten.getUrl());
+        Assert.assertEquals(epOrig, epRewritten);
+        List<Corpus> rootCorporaOrig = cacheOrig.getRootCorporaOfEndpoint(epOrig.getUrl());
+        List<Corpus> rootCorporaRewritten = cacheRewritten.getRootCorporaOfEndpoint(epOrig.getUrl());
+        Assert.assertEquals(rootCorporaOrig.size(), rootCorporaRewritten.size());
+        Assert.assertEquals(1, rootCorporaRewritten.size());
+        Assert.assertEquals(rootCorporaOrig.get(0).getLanguages(), rootCorporaRewritten.get(0).getLanguages());
+//        System.out.println(cacheOrig);
+//        System.out.println();
+//        System.out.println(cacheRewritten);
+    }
         @Test

SRUAggregator/trunk/src/test/java/eu/clarin/sru/fcs/aggregator/app/ScanCrawlerTest.java

-                      r5034
+                      r5035
 import eu.clarin.sru.client.SRUThreadedClient;
+import eu.clarin.sru.fcs.aggregator.cache.EndpointUrlFilter;
+import eu.clarin.sru.fcs.aggregator.cache.SimpleInMemScanCache;
+import eu.clarin.sru.fcs.aggregator.cache.ScanCrawler;
 import eu.clarin.sru.fcs.aggregator.sopt.CenterRegistryLive;
 import eu.clarin.sru.fcs.aggregator.sopt.Corpus;
 …
     public void testCrawlForMpiAndTue() {
+//        SRUThreadedClient sruClient = new SRUThreadedClient();
+//
+//        try {
+//            EndpointUrlFilter filter = new EndpointUrlFilter();
+//            filter.urlShouldContainAnyOf("uni-tuebingen.de", "mpi.nl");
+//            ScanCrawler crawler = new ScanCrawler(new CenterRegistryLive(), sruClient, filter, 2);
+//            ScanCache cache = crawler.crawl();
+//            Corpus tueRootCorpus = cache.getRootCorporaOfEndpoint("http://weblicht.sfs.uni-tuebingen.de/rws/sru/").get(0);
+//            Corpus mpiRootCorpus = cache.getRootCorporaOfEndpoint("http://cqlservlet.mpi.nl/").get(0);
+//            Assert.assertEquals("http://hdl.handle.net/11858/00-1778-0000-0001-DDAF-D",
+//                    tueRootCorpus.getHandle());
+//            Assert.assertEquals("hdl:1839/00-0000-0000-0003-4692-D@format=cmdi",
+//                    cache.getChildrenCorpora("hdl:1839/00-0000-0000-0001-53A5-2@format=cmdi").get(0).getHandle());
+//            //check if languages and other corpus data is crawled corectly...
+//            Set<String> tueLangs = new HashSet<String>();
+//            tueLangs.add("deu");
+//            Assert.assertEquals(tueLangs, tueRootCorpus.getLanguages());
+//            String tueDescSubstring = "TÃŒbingen Treebank";
+//            Assert.assertTrue("Description problem", tueRootCorpus.getDescription().contains(tueDescSubstring));
+//            String tueNameSubstring = "TuebaDDC";
+//            Assert.assertTrue("Name problem", tueRootCorpus.getDisplayName().contains(tueNameSubstring));
+//            String tuePageSubstring = "sfs.uni-tuebingen.de";
+//            Assert.assertTrue("Landing page problem", tueRootCorpus.getLandingPage().contains(tuePageSubstring));
+//            Assert.assertTrue("Number of records problem", mpiRootCorpus.getNumberOfRecords() > 10);
+//
+//        } finally {
+//            sruClient.shutdown();
+//        }
+//
+        SRUThreadedClient sruClient = new SRUThreadedClient();
+        try {
+            EndpointUrlFilter filter = new EndpointUrlFilter();
+            //filter.urlShouldContainAnyOf("leipzig", ".mpi.nl");
+            filter.urlShouldContainAnyOf("uni-tuebingen.de", ".mpi.nl");
+            //filter.urlShouldContainAnyOf("dspin.dwds.de", "lindat.");
+            ScanCrawler crawler = new ScanCrawler(new CenterRegistryLive(), sruClient, filter, 2);
+            SimpleInMemScanCache cache = new SimpleInMemScanCache();
+            crawler.crawl(cache);
+            Corpus tueRootCorpus = cache.getRootCorporaOfEndpoint("http://weblicht.sfs.uni-tuebingen.de/rws/sru/").get(0);
+            Corpus mpiRootCorpus = cache.getRootCorporaOfEndpoint("http://cqlservlet.mpi.nl/").get(0);
+            Assert.assertEquals("http://hdl.handle.net/11858/00-1778-0000-0001-DDAF-D",
+                    tueRootCorpus.getHandle());
+            Corpus mpiCorpus = cache.getCorpus("hdl:1839/00-0000-0000-0001-53A5-2@format=cmdi");
+            Assert.assertEquals("hdl:1839/00-0000-0000-0003-4692-D@format=cmdi", cache.getChildren(mpiCorpus).get(0).getHandle());
+            //check if languages and other corpus data is crawled corectly...
+            Set<String> tueLangs = new HashSet<String>();
+            tueLangs.add("deu");
+            Assert.assertEquals(tueLangs, tueRootCorpus.getLanguages());
+            String tueDescSubstring = "TÃŒbingen Treebank";
+            Assert.assertTrue("Description problem", tueRootCorpus.getDescription().contains(tueDescSubstring));
+            String tueNameSubstring = "TuebaDDC";
+            Assert.assertTrue("Name problem", tueRootCorpus.getDisplayName().contains(tueNameSubstring));
+            String tuePageSubstring = "sfs.uni-tuebingen.de";
+            Assert.assertTrue("Landing page problem", tueRootCorpus.getLandingPage().contains(tuePageSubstring));
+            Assert.assertTrue("Number of records problem", mpiRootCorpus.getNumberOfRecords() > 10);
+        } finally {
+            sruClient.shutdown();
+        }
+    }
+}

Context Navigation

Legend:

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/app/Aggregator.java

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/app/ControlsVisibility.java

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/app/SearchOptions.java

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/app/SearchResults.java

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/app/WebAppListener.java

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/cache/EndpointFilter.java

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/cache/EndpointUrlFilter.java

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/cache/ScanCache.java

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/cache/ScanCacheFiled.java

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/cache/ScanCrawlTask.java

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/cache/ScanCrawler.java

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/cache/SimpleInMemScanCache.java

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/rest/AggregatorService.java

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/sopt/CenterRegistryForTesting.java

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/sopt/CenterRegistryI.java

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/sopt/CenterRegistryLive.java

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/sopt/Corpus.java

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/sopt/CorpusModelCached.java

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/sopt/CorpusModelLive.java

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/sopt/Endpoint.java

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/sopt/Institution.java

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/sopt/Languages.java

SRUAggregator/trunk/src/test/java/eu/clarin/sru/fcs/aggregator/app/ScanCacheFiledTest.java

SRUAggregator/trunk/src/test/java/eu/clarin/sru/fcs/aggregator/app/ScanCrawlerTest.java

Download in other formats: