Context Navigation

← Previous Change
Next Change →

Changeset 5720 for SRUAggregator

Timestamp:

10/16/14 07:56:31 (10 years ago)

Author:

emanuel.dima@uni-tuebingen.de

Message:

Aggregator v2, major refactoring: removed ZK, split into a REST backend and a react+bootstrap-based javascript frontend; WIP

Location:

SRUAggregator/trunk

Files:

: 23 added
: 19 deleted
: 13 edited
: 2 moved

build.sh (added)
nb-configuration.xml (modified) (1 diff)
pom.xml (modified) (1 diff)
src/main/java/eu/clarin/sru/fcs/aggregator/app/Aggregator.java (modified) (1 diff)
src/main/java/eu/clarin/sru/fcs/aggregator/app/ControlsVisibility.java (deleted)
src/main/java/eu/clarin/sru/fcs/aggregator/app/DesktopDestroyedListener.java (deleted)
src/main/java/eu/clarin/sru/fcs/aggregator/app/PagesVisibility.java (deleted)
src/main/java/eu/clarin/sru/fcs/aggregator/app/SearchOptions.java (deleted)
src/main/java/eu/clarin/sru/fcs/aggregator/app/SearchResults.java (deleted)
src/main/java/eu/clarin/sru/fcs/aggregator/app/WebAppListener.java (deleted)
src/main/java/eu/clarin/sru/fcs/aggregator/cache/EndpointFilter.java (modified) (1 diff)
src/main/java/eu/clarin/sru/fcs/aggregator/cache/EndpointUrlFilter.java (modified) (1 diff)
src/main/java/eu/clarin/sru/fcs/aggregator/cache/ScanCache.java (modified) (1 diff)
src/main/java/eu/clarin/sru/fcs/aggregator/cache/ScanCacheFile.java (moved) (moved from SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/cache/ScanCacheFiled.java) (4 diffs)
src/main/java/eu/clarin/sru/fcs/aggregator/cache/ScanCrawlTask.java (modified) (3 diffs)
src/main/java/eu/clarin/sru/fcs/aggregator/cache/ScanCrawler.java (modified) (2 diffs)
src/main/java/eu/clarin/sru/fcs/aggregator/cache/SimpleInMemScanCache.java (modified) (3 diffs)
src/main/java/eu/clarin/sru/fcs/aggregator/rest/AggregatedEndpoint.java (deleted)
src/main/java/eu/clarin/sru/fcs/aggregator/rest/AggregatorService.java (deleted)
src/main/java/eu/clarin/sru/fcs/aggregator/rest/RestService.java (added)
src/main/java/eu/clarin/sru/fcs/aggregator/util/SRUCQL.java (modified) (1 diff)
src/main/java/eu/clarin/sru/fcs/aggregator/util/ZKComp.java (deleted)
src/main/webapp/CLARIN.css (added)
src/main/webapp/META-INF/MANIFEST.MF (deleted)
src/main/webapp/META-INF/context.xml (modified) (1 diff)
src/main/webapp/WEB-INF/web.xml (modified) (1 diff)
src/main/webapp/WEB-INF/zk.xml (deleted)
src/main/webapp/about-fcs.html (deleted)
src/main/webapp/about.zul (deleted)
src/main/webapp/base.css (added)
src/main/webapp/fonts (added)
src/main/webapp/fonts/Roboto_with_European_character_sets.woff (added)
src/main/webapp/fonts/glyphicons-halflings-regular.eot (added)
src/main/webapp/fonts/glyphicons-halflings-regular.svg (added)
src/main/webapp/fonts/glyphicons-halflings-regular.ttf (added)
src/main/webapp/fonts/glyphicons-halflings-regular.woff (added)
src/main/webapp/help-fcs.html (deleted)
src/main/webapp/help.zul (deleted)
src/main/webapp/img (added)
src/main/webapp/img/clarindLogo.png (added)
src/main/webapp/index.html (added)
src/main/webapp/index.zul (deleted)
src/main/webapp/js (added)
src/main/webapp/js/components.js (added)
src/main/webapp/js/components.jsx (added)
src/main/webapp/js/main.js (added)
src/main/webapp/js/main.jsx (added)
src/main/webapp/lib (added)
src/main/webapp/lib/bootstrap.min.css (added)
src/main/webapp/lib/jquery-1.10.0.min.js (added)
src/main/webapp/lib/react-bootstrap.min.js (added)
src/main/webapp/lib/react-with-addons-0.11.2.js (added)
src/main/webapp/searchOptions.zul (deleted)
src/main/webapp/searchResults.zul (deleted)
src/main/webapp/timeout.zul (deleted)
src/test/java/eu/clarin/sru/fcs/aggregator/app/ScanCacheFileTest.java (moved) (moved from SRUAggregator/trunk/src/test/java/eu/clarin/sru/fcs/aggregator/app/ScanCacheFiledTest.java) (3 diffs)
src/test/java/eu/clarin/sru/fcs/aggregator/app/ScanCrawlerTest.java (modified) (2 diffs)

Legend:

: Unmodified
: Added
: Removed

SRUAggregator/trunk/nb-configuration.xml

r5291	r5720
14	14	Any value defined here will override the pom.xml file value but is only applicable to the current project.
15	15	-->
	16	<org-netbeans-modules-maven-j2ee.netbeans_2e_hint_2e_deploy_2e_server>Tomcat</org-netbeans-modules-maven-j2ee.netbeans_2e_hint_2e_deploy_2e_server>
16	17	<org-netbeans-modules-maven-jaxws.rest_2e_config_2e_type>ide</org-netbeans-modules-maven-jaxws.rest_2e_config_2e_type>
17	18	</properties>

SRUAggregator/trunk/pom.xml

-                      r5291
+                      r5720
 <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+    <groupId>eu.clarin.sru.fcs</groupId>
+    <artifactId>aggregator</artifactId>
+    <version>1.5-SNAPSHOT</version>
+    <properties>
+        <endorsed.dir>${project.build.directory}/endorsed</endorsed.dir>
+        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+        <slf4j.version>1.7.2</slf4j.version>
+        <httpClient.version>4.2.5</httpClient.version>
+    </properties>
+    <packaging>war</packaging>
+    <name>Aggregator</name>
+    <description>The Aggregator Project</description>
+    <organization>
+        <name>SfS Uni Tuebingen</name>
+        <url>http://www.sfs.uni-tuebingen.de/</url>
+    </organization>
+    <licenses>
+        <license>
+            <name>GNU GENERAL PUBLIC LICENSE, Version 3</name>
+            <url>http://www.gnu.org/licenses/gpl.html</url>
+            <distribution>repo</distribution>
+        </license>
+    </licenses>
+    <repositories>
+        <repository>
+            <id>clarin</id>
+            <url>http://catalog.clarin.eu/ds/nexus/content/repositories/Clarin/</url>
+        </repository>
+        <repository>
+            <id>sardine-google-svn-repo</id>
+            <snapshots>
+                <enabled>true</enabled>
+            </snapshots>
+            <name>Sardine maven repo at Google Code</name>
+            <url>http://sardine.googlecode.com/svn/maven/</url>
+        </repository>
+    </repositories>
+    <dependencies>
+        <dependency>
+            <groupId>org.zkoss.theme</groupId>
+            <artifactId>silvertail</artifactId>
+            <version>6.5.1</version>
+        </dependency>
+        <dependency>
+            <groupId>org.zkoss.zk</groupId>
+            <artifactId>zkplus</artifactId>
+            <version>6.5.1</version>
+        </dependency>
+        <dependency>
+            <groupId>org.zkoss.zk</groupId>
+            <artifactId>zhtml</artifactId>
+            <version>6.5.1</version>
+        </dependency>
+        <dependency>
+            <groupId>com.google.code.gson</groupId>
+            <artifactId>gson</artifactId>
+            <version>2.2.2</version>
+        </dependency>
+        <dependency>
+            <groupId>eu.clarin.sru</groupId>
+            <artifactId>sru-client</artifactId>
+            <version>0.9.2</version>
+        </dependency>
+        <dependency>
+            <groupId>eu.clarin.weblicht</groupId>
+            <artifactId>wlfxb</artifactId>
+            <version>1.2.9</version>
+        </dependency>
+        <dependency>
+            <groupId>com.sun.jersey</groupId>
+            <artifactId>jersey-client</artifactId>
+            <version>1.17.1</version>
+        </dependency>
+        <dependency>
+            <groupId>com.sun.jersey</groupId>
+            <artifactId>jersey-servlet</artifactId>
+            <version>1.17.1</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.opennlp</groupId>
+            <artifactId>opennlp-tools</artifactId>
+            <version>1.5.3</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.poi</groupId>
+            <artifactId>poi-ooxml</artifactId>
+            <version>3.10-beta2</version>
+        </dependency>
+        <dependency>
+            <groupId>com.googlecode.sardine</groupId>
+            <artifactId>sardine</artifactId>
+            <version>314</version>
+            <type>jar</type>
+            <exclusions>
+                <exclusion>
+                    <groupId>org.apache.httpcomponents</groupId>
+                    <artifactId>httpcore</artifactId>
+                </exclusion>
+                <exclusion>
+                    <artifactId>commons-codec</artifactId>
+                    <groupId>commons-codec</groupId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-jdk14</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>eu.clarin.weblicht</groupId>
+            <artifactId>connectors</artifactId>
+            <version>1.0.6</version>
+        </dependency>
+        <dependency>
+            <groupId>eu.clarin.weblicht</groupId>
+            <artifactId>bindings</artifactId>
+            <version>1.0.4</version>
+        </dependency>
+        <dependency>
+            <groupId>joda-time</groupId>
+            <artifactId>joda-time</artifactId>
+            <version>2.2</version>
+        </dependency>
+        <dependency>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+            <version>4.10</version>
+            <scope>test</scope>
+            <type>jar</type>
+        </dependency>
+        <dependency>
+            <groupId>org.mockito</groupId>
+            <artifactId>mockito-all</artifactId>
+            <version>1.9.5</version>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>javax.servlet</groupId>
+            <artifactId>javax.servlet-api</artifactId>
+            <version>3.1.0</version>
+            <scope>provided</scope>
+        </dependency>
+    </dependencies>
+    <dependencyManagement>
+        <dependencies>
+            <dependency>
+                <groupId>org.slf4j</groupId>
+                <artifactId>slf4j-api</artifactId>
+                <version>${slf4j.version}</version>
+            </dependency>
+            <dependency>
+                <groupId>org.slf4j</groupId>
+                <artifactId>slf4j-jdk14</artifactId>
+                <version>${slf4j.version}</version>
+            </dependency>
+            <dependency>
+                <groupId>org.apache.httpcomponents</groupId>
+                <artifactId>httpclient</artifactId>
+                <version>${httpClient.version}</version>
+            </dependency>
+        </dependencies>
+    </dependencyManagement>
+    <build>
+                 xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+        <modelVersion>4.0.0</modelVersion>
+        <groupId>eu.clarin.sru.fcs</groupId>
+        <artifactId>Aggregator2</artifactId>
+        <version>2.0.0-ALPHA</version>
+        <packaging>war</packaging>
+        <name>Aggregator2</name>
+        <properties>
+                <endorsed.dir>${project.build.directory}/endorsed</endorsed.dir>
+                <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+        </properties>
+        <repositories>
+                <repository>
+                        <id>clarin</id>
+                        <url>http://catalog.clarin.eu/ds/nexus/content/repositories/Clarin/</url>
+                </repository>
+                <repository>
+                        <id>sardine-google-svn-repo</id>
+                        <snapshots>
+                                <enabled>true</enabled>
+                        </snapshots>
+                        <name>Sardine maven repo at Google Code</name>
+                        <url>http://sardine.googlecode.com/svn/maven/</url>
+                </repository>
+        </repositories>
+        <dependencies>
+                <dependency>
+                        <groupId>eu.clarin.sru</groupId>
+                        <artifactId>sru-client</artifactId>
+                        <version>0.9.2</version>
+                </dependency>
+                <dependency>
+                        <groupId>eu.clarin.weblicht</groupId>
+                        <artifactId>wlfxb</artifactId>
+                        <version>1.2.9</version>
+                </dependency>
+                <dependency>
+                        <groupId>eu.clarin.weblicht</groupId>
+                        <artifactId>connectors</artifactId>
+                        <version>1.0.6</version>
+                </dependency>
+                <dependency>
+                        <groupId>eu.clarin.weblicht</groupId>
+                        <artifactId>bindings</artifactId>
+                        <version>1.0.4</version>
+                </dependency>
+                <dependency>
+                        <groupId>org.apache.opennlp</groupId>
+                        <artifactId>opennlp-tools</artifactId>
+                        <version>1.5.3</version>
+                </dependency>
+                <dependency>
+                        <groupId>com.googlecode.sardine</groupId>
+                        <artifactId>sardine</artifactId>
+                        <version>314</version>
+                        <type>jar</type>
+                        <exclusions>
+                                <exclusion>
+                                        <groupId>org.apache.httpcomponents</groupId>
+                                        <artifactId>httpcore</artifactId>
+                                </exclusion>
+                                <exclusion>
+                                        <artifactId>commons-codec</artifactId>
+                                        <groupId>commons-codec</groupId>
+                                </exclusion>
+                        </exclusions>
+                </dependency>
+                <dependency>
+                        <groupId>org.apache.poi</groupId>
+                        <artifactId>poi-ooxml</artifactId>
+                        <version>3.10-beta2</version>
+                </dependency>
+                <dependency>
+                        <groupId>joda-time</groupId>
+                        <artifactId>joda-time</artifactId>
+                        <version>2.2</version>
+                </dependency>
+                <dependency>
+                        <groupId>junit</groupId>
+                        <artifactId>junit</artifactId>
+                        <version>4.10</version>
+                        <scope>test</scope>
+                        <type>jar</type>
+                </dependency>
+                <dependency>
+                        <groupId>javax</groupId>
+                        <artifactId>javaee-web-api</artifactId>
+                        <version>6.0</version>
+                        <scope>provided</scope>
+                </dependency>
+                <dependency>
+                        <groupId>com.sun.jersey</groupId>
+                        <artifactId>jersey-server</artifactId>
+                        <version>1.18.1</version>
+                </dependency>
+                <dependency>
+                        <groupId>com.sun.jersey</groupId>
+                        <artifactId>jersey-servlet</artifactId>
+                        <version>1.18.1</version>
+                </dependency>
+                <dependency>
+                        <groupId>com.sun.jersey</groupId>
+                        <artifactId>jersey-client</artifactId>
+                        <version>1.18.1</version>
+                </dependency>
+                <dependency>
+                        <groupId>com.sun.jersey</groupId>
+                        <artifactId>jersey-core</artifactId>
+                        <version>1.18.1</version>
+                </dependency>
+                <dependency>
+                        <groupId>com.sun.jersey</groupId>
+                        <artifactId>jersey-json</artifactId>
+                        <version>1.18.1</version>
+                </dependency>
+                <dependency>
+                        <groupId>com.sun.jersey.contribs</groupId>
+                        <artifactId>jersey-multipart</artifactId>
+                        <version>1.18.1</version>
+                </dependency>
+        </dependencies>
+        <build>
         <finalName>${project.artifactId}</finalName>
         <plugins>
             <plugin>
                 <groupId>org.apache.maven.plugins</groupId>
                 <artifactId>maven-compiler-plugin</artifactId>
                 <version>2.3.2</version>
                 <configuration>
                     <source>1.6</source>
                     <target>1.6</target>
                     <compilerArguments>
                         <endorseddirs>${endorsed.dir}</endorseddirs>
                     </compilerArguments>
                 </configuration>
             </plugin>
             <plugin>
                 <groupId>org.apache.maven.plugins</groupId>
                 <artifactId>maven-war-plugin</artifactId>
                 <version>2.1.1</version>
                 <configuration>
                     <failOnMissingWebXml>false</failOnMissingWebXml>
                 </configuration>
             </plugin>
             <plugin>
                 <groupId>org.apache.maven.plugins</groupId>
                 <artifactId>maven-dependency-plugin</artifactId>
                 <version>2.1</version>
                 <executions>
                     <execution>
                         <phase>validate</phase>
                         <goals>
                             <goal>copy</goal>
                         </goals>
                         <configuration>
                             <outputDirectory>${endorsed.dir}</outputDirectory>
                             <silent>true</silent>
                             <artifactItems>
                                 <artifactItem>
                                     <groupId>javax</groupId>
                                     <artifactId>javaee-endorsed-api</artifactId>
                                     <version>6.0</version>
                                     <type>jar</type>
                                 </artifactItem>
                             </artifactItems>
                         </configuration>
                     </execution>
                 </executions>
             </plugin>
         </plugins>
     </build>
+                <plugins>
+                        <plugin>
+                                <groupId>org.apache.maven.plugins</groupId>
+                                <artifactId>maven-compiler-plugin</artifactId>
+                                <version>2.3.2</version>
+                                <configuration>
+                                        <source>1.6</source>
+                                        <target>1.6</target>
+                                        <compilerArguments>
+                                                <endorseddirs>${endorsed.dir}</endorseddirs>
+                                        </compilerArguments>
+                                </configuration>
+                        </plugin>
+                        <plugin>
+                                <groupId>org.apache.maven.plugins</groupId>
+                                <artifactId>maven-war-plugin</artifactId>
+                                <version>2.1.1</version>
+                                <configuration>
+                                        <failOnMissingWebXml>false</failOnMissingWebXml>
+                                </configuration>
+                        </plugin>
+                        <plugin>
+                                <groupId>org.apache.maven.plugins</groupId>
+                                <artifactId>maven-dependency-plugin</artifactId>
+                                <version>2.1</version>
+                                <executions>
+                                        <execution>
+                                                <phase>validate</phase>
+                                                <goals>
+                                                        <goal>copy</goal>
+                                                </goals>
+                                                <configuration>
+                                                        <outputDirectory>${endorsed.dir}</outputDirectory>
+                                                        <silent>true</silent>
+                                                        <artifactItems>
+                                                                <artifactItem>
+                                                                        <groupId>javax</groupId>
+                                                                        <artifactId>javaee-endorsed-api</artifactId>
+                                                                        <version>6.0</version>
+                                                                        <type>jar</type>
+                                                                </artifactItem>
+                                                        </artifactItems>
+                                                </configuration>
+                                        </execution>
+                                </executions>
+                        </plugin>
+                </plugins>
+        </build>
 </project>

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/app/Aggregator.java

-                      r5041
+                      r5720
 package eu.clarin.sru.fcs.aggregator.app;
+import eu.clarin.sru.fcs.aggregator.search.Search;
+import eu.clarin.sru.fcs.aggregator.cache.ScanCrawlTask;
+import eu.clarin.sru.fcs.aggregator.cache.ScanCrawler;
+import eu.clarin.sru.fcs.aggregator.cache.ScanCacheFile;
+import eu.clarin.sru.fcs.aggregator.cache.SimpleInMemScanCache;
+import eu.clarin.sru.client.SRUThreadedClient;
+import eu.clarin.sru.client.SRUVersion;
+import eu.clarin.sru.client.fcs.ClarinFCSRecordParser;
+import eu.clarin.sru.fcs.aggregator.cache.EndpointUrlFilter;
+import eu.clarin.sru.fcs.aggregator.registry.CenterRegistryLive;
+import eu.clarin.sru.fcs.aggregator.cache.ScanCache;
+import eu.clarin.sru.fcs.aggregator.registry.Corpus;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
+import java.util.Set;
+import java.util.logging.*;
+import org.zkoss.zk.ui.Component;
+import org.zkoss.zk.ui.Executions;
+import org.zkoss.zk.ui.event.Event;
+import org.zkoss.zk.ui.select.SelectorComposer;
+import org.zkoss.zk.ui.select.annotation.Listen;
+import org.zkoss.zk.ui.select.annotation.Wire;
+import org.zkoss.zul.Label;
+import org.zkoss.zul.Messagebox;
+import org.zkoss.zul.Textbox;
+import eu.clarin.sru.fcs.aggregator.sopt.Corpus;
+import eu.clarin.sru.fcs.aggregator.sopt.Languages;
+import eu.clarin.sru.fcs.aggregator.util.SRUCQL;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicReference;
+import java.util.logging.Level;
+import java.util.logging.Logger;
 import javax.naming.InitialContext;
 import javax.naming.NamingException;
+import org.zkoss.zul.A;
+import org.zkoss.zul.Div;
+import org.zkoss.zul.Menubar;
+import org.zkoss.zul.Menuitem;
+import org.zkoss.zul.North;
+import org.zkoss.zul.Popup;
+import org.zkoss.zul.Progressmeter;
+import org.zkoss.zul.South;
+import javax.servlet.ServletContextEvent;
+import javax.servlet.ServletContextListener;
+import opennlp.tools.tokenize.TokenizerModel;
 /**
+ * Main component of the Aggregator application intended to provide
+ * users access to CLARIN-FCS resources.
+ *
+ * The webapp base URL corresponds to the default behavior of displaying
+ * the main aggregator page, where the user can enter query, select the
+ * resources of CQL endpoints (as specified in the Clarin center registry),
+ * and search in these resources. The endpoints/resources selection is
+ * optional, by default all the endpoints root resources are selected.
+ *
+ * If invoked with 'x-aggregation-context' and 'query' parameter,
+ * the aggregator will pre-select provided resources and fill in the query field.
+ * This mechanism is currently used by VLO.
+ * Example:
+ * POST http://weblicht.sfs.uni-tuebingen.de/Aggregator HTTP/1.1
+ * operation = searchRetrieve &
+ * version = 1.2 &
+ * query = bellen &
+ * x-aggregation-context = {"http://fedora.clarin-d.uni-saarland.de/sru/":["hdl:11858/00-246C-0000-0008-5F2A-0"]}
+ *
+ *
+ * Additionally, if run with the a URL query string parameter 'mode', the
+ * Main component of the Aggregator application intended to provide users access
+ * to CLARIN-FCS resources.
+ *
+ * The webapp base URL corresponds to the default behavior of displaying the
+ * main aggregator page, where the user can enter query, select the resources of
+ * CQL endpoints (as specified in the Clarin center registry), and search in
+ * these resources. The endpoints/resources selection is optional, by default
+ * all the endpoints root resources are selected.
+ *
+ * If invoked with 'x-aggregation-context' and 'query' parameter, the aggregator
+ * will pre-select provided resources and fill in the query field. This
+ * mechanism is currently used by VLO. Example: POST
+ * http://weblicht.sfs.uni-tuebingen.de/Aggregator HTTP/1.1 operation =
+ * searchRetrieve & version = 1.2 & query = bellen & x-aggregation-context =
+ * {"http://fedora.clarin-d.uni-saarland.de/sru/":["hdl:11858/00-246C-0000-0008-5F2A-0"]}
+ *
+ *
+ * Additionally, if run with the a URL query string parameter 'mode', the
  * special behavior of the aggregator is triggered:
+ *
+ * /?mode=testing
+ * corresponds to the mode where the CQL endpoints are taken not from Clarin
+ * center repository, but from a hard-coded endpoints list; this functionality
+ * is useful for testing the development instances of endpoints, before they
+ * are moved to production. Was done to meet the request from MPI.
+ *
+ * /?mode=search
+ * corresponds to the mode where the aggregator page is requested with the
+ * already known query and (optionally) resources to search in, and if the
+ * immediate search is desired. In this case the aggregator search results
+ * page is displayed and search results of the provided query start to fill
+ * it in immediately (i.e. users don't need to click 'search' in the aggregator
+ * page). Was done to meet the request from CLARIN ERIC (Martin Wynne
+ * contacted us).
+ *
+ * /?mode=live
+ * corresponds to the mode where the information about corpora are taken not
+ * from the scan cache (crawled in advance), but loaded live, starting from
+ * the request to center registry and then performing scan operation requests on
+ * each CQL  endpoint listed there. It takes time to get the corresponding
+ * responses from the endpoints, therefore the Aggregator page loads very slow
+ * in this mode. But this mode is useful for testing of the newly added or
+ * changed corpora without waiting for the next crawl.
+ *
+ *
+ *
+ * /?mode=testing corresponds to the mode where the CQL endpoints are taken not
+ * from Clarin center repository, but from a hard-coded endpoints list; this
+ * functionality is useful for testing the development instances of endpoints,
+ * before they are moved to production. Was done to meet the request from MPI.
+ *
+ * /?mode=search corresponds to the mode where the aggregator page is requested
+ * with the already known query and (optionally) resources to search in, and if
+ * the immediate search is desired. In this case the aggregator search results
+ * page is displayed and search results of the provided query start to fill it
+ * in immediately (i.e. users don't need to click 'search' in the aggregator
+ * page). Was done to meet the request from CLARIN ERIC (Martin Wynne contacted
+ * us).
+ *
+ * /?mode=live corresponds to the mode where the information about corpora are
+ * taken not from the scan cache (crawled in advance), but loaded live, starting
+ * from the request to center registry and then performing scan operation
+ * requests on each CQL endpoint listed there. It takes time to get the
+ * corresponding responses from the endpoints, therefore the Aggregator page
+ * loads very slow in this mode. But this mode is useful for testing of the
+ * newly added or changed corpora without waiting for the next crawl.
+ *
+ *
+ * Adds Application initialization and clean up: only one SRU threaded client is
+ * used in the application, it has to be shut down when the application stops.
+ * One Languages object instance is used within the application.
+ *
  * @author Yana Panchenko
+ * @author edima
  */
+public class Aggregator extends SelectorComposer<Component> {
+    private static final Logger LOGGER = Logger.getLogger(Aggregator.class.getName());
+    @Wire
+    private Textbox searchString;
+    @Wire
+    private Popup wspaceSigninpop;
+    @Wire
+    private Textbox wspaceUserName;
+    @Wire
+    private Textbox wspaceUserPwd;
+    private int exportDataType = 1;
+    @Wire
+    private Div aboutDiv;
+    @Wire
+    private Label aboutLabel;
+    @Wire
+    private Div soDiv;
+    private SearchOptions searchOptionsComposer;
+    @Wire
+    private Label soLabel;
+    @Wire
+    private Div srDiv;
+    private SearchResults searchResultsComposer;
+    @Wire
+    private Label srLabel;
+    @Wire
+    private Div helpDiv;
+    @Wire
+    private Label helpLabel;
+    @Wire
+    private Progressmeter pMeter;
+    @Wire
+    private Menubar menubar;
+    @Wire
+    private North controls1;
+    @Wire
+    private South controls2;
+    @Wire
+    private A prevButton;
+    @Wire
+    private A nextButton;
+    @Wire
+    private Label tooltipPrevText;
+    @Wire
+    private Label tooltipNextText;
+    @Wire
+    private Menuitem weblichtTcf;
+    private int[] searchOffset = new int[]{1, 0}; // start and size
+    private ControlsVisibility controlsVisibility;
+    private PagesVisibility pagesVisibility;
+    private String weblichtUrl; // defined in web.xml
+    public static final String MODE_PARAM = "mode";
+    public static final String MODE_PARAM_VALUE_TEST = "testing";
+    public static final String MODE_PARAM_VALUE_SEARCH = "search";
+    public static final String MODE_PARAM_VALUE_LIVE = "live";
+    @Override
+    public void doAfterCompose(Component comp) throws Exception {
+        super.doAfterCompose(comp);
+        processContext();
+        processParameters();
+        searchOptionsComposer = (SearchOptions) soDiv.getChildren().get(0).getChildren().get(0).getAttribute("$" + SearchOptions.class.getSimpleName());
+        searchOptionsComposer.setAggregatorController(this);
+        searchResultsComposer = (SearchResults) srDiv.getChildren().get(0).getChildren().get(0).getAttribute("$" + SearchResults.class.getSimpleName());
+        pagesVisibility = new PagesVisibility(aboutDiv, aboutLabel, soDiv, soLabel, srDiv, srLabel, helpDiv, helpLabel);
+        controlsVisibility = new ControlsVisibility(controls1, controls2, pMeter, menubar, prevButton, nextButton);
+        searchResultsComposer.setVisibilityControllers(pagesVisibility, controlsVisibility);
+    }
+    @Listen("onClick = #searchButton")
+    public void onExecuteSearch(Event ev) {
+        Map<String, Set<Corpus>> selectedCorpora = searchOptionsComposer.getSelectedCorpora();
+        boolean emptyCorpora = true;
+        for (Set<Corpus> corpora : selectedCorpora.values()) {
+            if (!corpora.isEmpty()) {
+                emptyCorpora = false;
+                break;
+            }
+        }
+        if (emptyCorpora) {
+            Messagebox.show("No corpora is selected. To perform the search, please select corus/corpora of interest by checking the corpora checkboxes.", "FCS", 0, Messagebox.INFORMATION);
+        } else if (searchString.getText().isEmpty()) {
+            Messagebox.show("No query is specified. To perform the search, please enter a keyword of interest in the search input field, e.g. Elefant, and press the 'Search' button.", "FCS", 0, Messagebox.INFORMATION);
+        } else {
+            int maxRecords = searchOptionsComposer.getMaxRecords();
+            String searchLang = searchOptionsComposer.getSearchLang();
+            //searchOffset = new int[]{1, 0};
+            searchOffset = new int[]{1, 0};
+            searchOffset[0] = searchOffset[0] + searchOffset[1];
+            searchOffset[1] = maxRecords;
+            searchResultsComposer.executeSearch(selectedCorpora, searchOffset[0], maxRecords, searchString.getText(), searchLang);
+            if (searchLang.equals(Languages.ANY_LANGUAGE_NAME)) {
+                this.weblichtTcf.setVisible(false);
+            } else {
+                this.weblichtTcf.setVisible(true);
+            }
+            onClickSearchResult(null);
+        }
+    }
+    @Listen("onOK = #searchString")
+    public void onEnterSearchString(Event ev) {
+        onExecuteSearch(ev);
+    }
+    @Listen("onClick=#clearResults")
+    public void onClearResults(Event ev) {
+        this.searchResultsComposer.clearResults();
+    }
+    @Listen("onClick=#downloadCSV")
+    public void onExportResultsCSV(Event ev) {
+        searchResultsComposer.exportCSV();
+    }
+    @Listen("onClick=#downloadTCF")
+    public void onExportResultsTCF(Event ev) {
+        searchResultsComposer.exportTCF();
+    }
+    @Listen("onClick=#downloadText")
+    public void onExportResultsText(Event ev) {
+        searchResultsComposer.exportText();
+    }
+    @Listen("onClick=#downloadExcel")
+    public void onExportResultsExcel(Event ev) {
+        searchResultsComposer.exportExcel();
+    }
+    @Listen("onClick=#exportPWCSV")
+    public void onExportResultsPWCSV(Event ev) {
+        exportDataType = 1;
+        wspaceSigninpop.open(srDiv, "top_center");
+    }
+    @Listen("onClick=#exportPWTCF")
+    public void onExportResultsPWTCF(Event ev) {
+        exportDataType = 0;
+        wspaceSigninpop.open(srDiv, "top_center");
+    }
+    @Listen("onClick=#exportPWText")
+    public void onExportResultsPWText(Event ev) {
+        exportDataType = 2;
+        wspaceSigninpop.open(srDiv, "top_center");
+    }
+    @Listen("onClick=#exportPWExcel")
+    public void onExportResultsPWExcel(Event ev) {
+        exportDataType = 3;
+        wspaceSigninpop.open(srDiv, "top_center");
+    }
+    @Listen("onClick=#weblichtText")
+    public void onUseWebLichtOnText(Event ev) {
+        String url = searchResultsComposer.useWebLichtOnText();
+        if (url != null) {
+            Executions.getCurrent().sendRedirect(weblichtUrl
+                + url, "_blank");
+        }
+    }
+    @Listen("onClick=#weblichtTcf")
+    public void onUseWebLichtOnTcf(Event ev) {
+        String url = searchResultsComposer.useWebLichtOnToks();
+        if (url != null) {
+            Executions.getCurrent().sendRedirect(weblichtUrl
+                + url, "_blank");
+        }
+    }
+    @Listen("onClick=#wspaceSigninBtn")
+    public void onSignInExportResults(Event ev) {
+        String user = wspaceUserName.getValue();
+        String pswd = wspaceUserPwd.getValue();
+        wspaceUserPwd.setValue("");
+        if (user.isEmpty() || pswd.isEmpty()) {
+            Messagebox.show("Need user name and password!");
+        } else {
+            wspaceSigninpop.close();
+            if (exportDataType == 0) {
+                searchResultsComposer.exportPWTCF(user, pswd);
+            } else if (exportDataType == 1) {
+                searchResultsComposer.exportPWCSV(user, pswd);
+            } else if (exportDataType == 2) {
+                searchResultsComposer.exportPWText(user, pswd);
+            } else if (exportDataType == 3) {
+                searchResultsComposer.exportPWExcel(user, pswd);
+            }
+        }
+    }
+    @Listen("onOK=#wspaceUserPwd")
+    public void onSignInExportResultsPwdOK(Event ev) {
+        onSignInExportResults(ev);
+    }
+    @Listen("onClick=#wspaceCancelBtn")
+    public void onSignInPWCancel(Event ev) {
+        wspaceUserPwd.setValue("");
+        wspaceSigninpop.close();
+    }
+    @Listen("onClick = #helpLabel")
+    public void onClickHelp(Event ev) {
+        this.pagesVisibility.openHelp();
+        this.controlsVisibility.disableControls1();
+        this.controlsVisibility.disableControls2();
+    }
+    @Listen("onClick = #aboutLabel")
+    public void onClickAbout(Event ev) {
+        this.pagesVisibility.openAbout();
+        this.controlsVisibility.disableControls1();
+        this.controlsVisibility.disableControls2();
+    }
+    @Listen("onClick = #soLabel")
+    public void onClickAdvSearch(Event ev) {
+        this.pagesVisibility.openSearchOptions();
+        this.controlsVisibility.disableControls1();
+        this.controlsVisibility.disableControls2();
+    }
+    @Listen("onClick = #srLabel")
+    public void onClickSearchResult(Event ev) {
+        setupPrevNextSearchTooltips();
+        this.pagesVisibility.openSearchResult();
+        if (this.searchResultsComposer.hasSearchInProgress()) {
+            this.controlsVisibility.enableControls2();
+        }
+        if (this.searchResultsComposer.hasResults()) {
+            this.controlsVisibility.enableControls1();
+            this.controlsVisibility.enableControls2();
+        }
+    }
+    @Listen("onClick = #prevButton")
+    public void onSearchPrev(Event ev) {
+        Map<String, Set<Corpus>> selectedCorpora = searchOptionsComposer.getSelectedCorpora();
+        boolean emptyCorpora = true;
+        for (Set<Corpus> corpora : selectedCorpora.values()) {
+            if (!corpora.isEmpty()) {
+                emptyCorpora = false;
+                break;
+            }
+        }
+        if (emptyCorpora) {
+            Messagebox.show("No corpora is selected. To perform the search, please select corus/corpora of interest by checking the corpora checkboxes.", "FCS", 0, Messagebox.INFORMATION);
+        } else if (searchString.getText().isEmpty()) {
+            Messagebox.show("No query is specified. To perform the search, please enter a keyword of interest in the search input field, e.g. Elefant, and press the 'Search' button.", "FCS", 0, Messagebox.INFORMATION);
+        } else {
+            int maxRecords = searchOptionsComposer.getMaxRecords();
+            String searchLang = searchOptionsComposer.getSearchLang();
+            //searchOffset[0] = searchOffset[0] - searchOffset[1];
+            searchOffset[0] = searchOffset[0] - maxRecords;
+            if (searchOffset[0] < 1) {
+                searchOffset[0] = 1;
+            }
+            searchOffset[1] = maxRecords;
+            searchResultsComposer.executeSearch(selectedCorpora, searchOffset[0], maxRecords, searchString.getText(), searchLang);
+            if (searchLang.equals(Languages.ANY_LANGUAGE_NAME)) {
+                this.weblichtTcf.setVisible(false);
+            } else {
+                this.weblichtTcf.setVisible(true);
+            }
+            onClickSearchResult(null);
+        }
+    }
+    @Listen("onClick = #nextButton")
+    public void onSearchNext(Event ev) {
+        Map<String, Set<Corpus>> selectedCorpora = searchOptionsComposer.getSelectedCorpora();
+        boolean emptyCorpora = true;
+        for (Set<Corpus> corpora : selectedCorpora.values()) {
+            if (!corpora.isEmpty()) {
+                emptyCorpora = false;
+                break;
+            }
+        }
+        if (emptyCorpora) {
+            Messagebox.show("No corpora is selected. To perform the search, please select corus/corpora of interest by checking the corpora checkboxes.", "FCS", 0, Messagebox.INFORMATION);
+        } else if (searchString.getText().isEmpty()) {
+            Messagebox.show("No query is specified. To perform the search, please enter a keyword of interest in the search input field, e.g. Elefant, and press the 'Search' button.", "FCS", 0, Messagebox.INFORMATION);
+        } else {
+            int maxRecords = searchOptionsComposer.getMaxRecords();
+            String searchLang = searchOptionsComposer.getSearchLang();
+            searchOffset[0] = searchOffset[0] + searchOffset[1];
+            searchOffset[1] = maxRecords;
+            searchResultsComposer.executeSearch(selectedCorpora, searchOffset[0], maxRecords, searchString.getText(), searchLang);
+            if (searchLang.equals(Languages.ANY_LANGUAGE_NAME)) {
+                this.weblichtTcf.setVisible(false);
+            } else {
+                this.weblichtTcf.setVisible(true);
+            }
+            onClickSearchResult(null);
+        }
+    }
+    private void processParameters() {
+        String[] paramValue;
+        String query = null;
+        paramValue = Executions.getCurrent().getParameterMap().get(SRUCQL.SEARCH_QUERY_PARAMETER);
+        if (paramValue != null) {
+            query = paramValue[0].trim();
+            searchString.setValue(query);
+        }
+        LOGGER.log(Level.INFO, "Received parameter: query[{0}], ", query);
+        paramValue = Executions.getCurrent().getParameterMap().get(SRUCQL.OPERATION);
+        String operationString = null;
+        if (paramValue != null) {
+            operationString = paramValue[0].trim();
+            if (!operationString.equals(SRUCQL.SEARCH_RETRIEVE)) {
+                Messagebox.show("Not supported operation " + operationString, "FCS", 0, Messagebox.INFORMATION);
+            }
+        }
+        LOGGER.log(Level.INFO, "Received parameter: operation[{0}], ", operationString);
+    }
+    private void setupPrevNextSearchTooltips() {
+        int startHit = searchOffset[0] - searchOptionsComposer.getMaxRecords();
+        if (startHit < 1) {
+            startHit = 1;
+        }
+        int endHit = searchOffset[0] - 1;
+        tooltipPrevText.setValue("hits " +
+                    startHit + "-" + endHit);
+        startHit = searchOffset[0] + searchOffset[1];
+        endHit = startHit + searchOptionsComposer.getMaxRecords() - 1;
+        tooltipNextText.setValue("hits " +
+                    startHit + "-" + endHit);
+    }
+    private void processContext() {
+        InitialContext context;
+        try {
+            context = new InitialContext();
+            weblichtUrl = (String) context.lookup("java:comp/env/weblicht-url");
+        } catch (NamingException ex) {
+            LOGGER.log(Level.SEVERE, null, ex);
+        }
+    }
+public class Aggregator implements ServletContextListener {
+        private static final Logger LOGGER = Logger.getLogger(Aggregator.class.getName());
+        public static final int WAITING_TIME_FOR_SHUTDOWN_MS = 10000;
+        public static final String DE_TOK_MODEL = "/tokenizer/de-tuebadz-8.0-token.bin";
+        private static final String DEFAULT_DATA_LOCATION = "/data";
+        private static final String SCAN_DIR_NAME = "scan";
+        private static final ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(1);
+        private static Aggregator instance;
+        private AtomicReference<ScanCache> scanCacheAtom = new AtomicReference<ScanCache>();
+        private TokenizerModel model;
+        private SRUThreadedClient sruClient = null;
+        private Map<Long, Search> activeSearches = Collections.synchronizedMap(new HashMap<Long, Search>());
+        public static Aggregator getInstance() {
+                return instance;
+        }
+        public ScanCache getScanCache() {
+                return scanCacheAtom.get();
+        }
+        @Override
+        public void contextInitialized(ServletContextEvent servletContextEvent) {
+                LOGGER.info("Aggregator is starting now.");
+                instance = this;
+                try {
+                        sruClient = new SRUThreadedClient();
+                        sruClient.registerRecordParser(new ClarinFCSRecordParser());
+                        InitialContext context = new InitialContext();
+                        Integer cacheMaxDepth = (Integer) context.lookup("java:comp/env/scan-max-depth");
+                        EndpointUrlFilter filter //= null;
+                                        = new EndpointUrlFilter("uni-tuebingen.de", ".mpi.nl", "dspin.dwds.de", "lindat.");
+                        ScanCrawler scanCrawler = new ScanCrawler(new CenterRegistryLive(), sruClient, filter, cacheMaxDepth);
+                        ScanCacheFile scanCacheFile = new ScanCacheFile(getScanDirectory());
+                        LOGGER.info("Start cache read");
+                        try {
+                                scanCacheAtom.set(scanCacheFile.read());
+                                LOGGER.info("Finished cache read, number of root corpora: " + scanCacheAtom.get().getRootCorpora().size());
+                        } catch (Exception e) {
+                                LOGGER.log(Level.SEVERE, "Error while reading the scan cache!", e);
+                                scanCacheAtom.set(new SimpleInMemScanCache());
+                        }
+                        String updateIntervalUnitString = (String) context.lookup("java:comp/env/update-interval-unit");
+                        TimeUnit cacheUpdateIntervalUnit = TimeUnit.valueOf(updateIntervalUnitString);
+                        Integer cacheUpdateInterval = (Integer) context.lookup("java:comp/env/update-interval");
+                        scheduler.scheduleAtFixedRate(
+                                        new ScanCrawlTask(scanCrawler, scanCacheFile, scanCacheAtom),
+, cacheUpdateInterval, cacheUpdateIntervalUnit);
+                        model = setUpTokenizers();
+                        LOGGER.info("Aggregator initialization finished.");
+                } catch (Exception ex) {
+                        LOGGER.log(Level.SEVERE, null, ex);
+                        instance = null; // force crash
+                }
+        }
+        @Override
+        public void contextDestroyed(ServletContextEvent sce) {
+                LOGGER.info("Aggregator is shutting down.");
+                for (Search search : activeSearches.values()) {
+                        search.shutdown();
+                }
+                shutdownAndAwaitTermination(sruClient, scheduler);
+                LOGGER.info("Aggregator shutdown complete.");
+        }
+        public static SRUVersion getSRUVersion(String sruversion) {
+                SRUVersion version = SRUVersion.VERSION_1_2;
+                if (sruversion.equals("1.2")) {
+                        version = SRUVersion.VERSION_1_2;
+                } else if (sruversion.equals("1.1")) {
+                        version = SRUVersion.VERSION_1_1;
+                } else {
+                        return null;
+                }
+                return version;
+        }
+        // this function should be thread-safe
+        public Search startSearch(SRUVersion version, List<Corpus> corpora, String searchString, String searchLang, int maxRecords) throws Exception {
+                if (corpora.isEmpty()) {
+                        // No corpora
+                        return null;
+                } else if (searchString.isEmpty()) {
+                        // No query
+                        return null;
+                } else {
+                        Search sr = new Search(sruClient, version, corpora, searchString, searchLang, 1, maxRecords);
+                        activeSearches.put(sr.getId(), sr);
+                        return sr;
+                }
+        }
+        public Search getSearchById(Long id) {
+                return activeSearches.get(id);
+        }
+        private static String getScanDirectory() throws NamingException {
+                InitialContext context = new InitialContext();
+                String dataLocationPropertyName = (String) context.lookup("java:comp/env/data-location-property");
+                String aggregatorDirName = (String) context.lookup("java:comp/env/aggregator-folder");
+                // see if data location is set in properties
+                String dataLocation = System.getProperty(dataLocationPropertyName);
+                if (dataLocation == null || !(new File(dataLocation, aggregatorDirName).exists())) {
+                        dataLocation = DEFAULT_DATA_LOCATION;
+                        if (!(new File(dataLocation, aggregatorDirName).exists())) {
+                                dataLocation = System.getProperty("user.home");
+                        }
+                        if ((new File(dataLocation, aggregatorDirName).exists())) {
+                                LOGGER.info(dataLocationPropertyName + " property is not defined, "
+                                                + "setting to default: " + dataLocation);
+                        } else {
+                                LOGGER.info(dataLocationPropertyName + " property is not defined, "
+                                                + "default location does not exist: " + dataLocation);
+                                throw new RuntimeException("Data location not found");
+                        }
+                }
+                File aggregatorDir = new File(dataLocation, aggregatorDirName);
+                if (!aggregatorDir.exists()) {
+                        LOGGER.severe("Aggregator directory does not exist: "
+                                        + aggregatorDir.getAbsolutePath());
+                }
+                File scanDir = new File(aggregatorDir, SCAN_DIR_NAME);
+                if (!scanDir.exists()) {
+                        if (!scanDir.mkdir()) {
+                                LOGGER.severe("Scan directory does not exist and cannot be created: "
+                                                + aggregatorDir.getAbsolutePath());
+                        }
+                }
+                String scanPath = scanDir.getAbsolutePath();
+                LOGGER.info("Scan data location: " + scanPath);
+                return scanPath;
+        }
+        private static void shutdownAndAwaitTermination(SRUThreadedClient sruClient, ExecutorService scheduler) {
+                try {
+                        sruClient.shutdown();
+                        scheduler.shutdown();
+                        Thread.sleep(WAITING_TIME_FOR_SHUTDOWN_MS);
+                        sruClient.shutdownNow();
+                        scheduler.shutdownNow();
+                        Thread.sleep(WAITING_TIME_FOR_SHUTDOWN_MS);
+                } catch (InterruptedException ie) {
+                        sruClient.shutdownNow();
+                        scheduler.shutdownNow();
+                        Thread.currentThread().interrupt();
+                }
+        }
+        private static TokenizerModel setUpTokenizers() {
+                TokenizerModel model = null;
+                try {
+                        InputStream tokenizerModelDeAsIS = Thread.currentThread().getContextClassLoader().getResourceAsStream(DE_TOK_MODEL);
+                        model = new TokenizerModel(tokenizerModelDeAsIS);
+                        tokenizerModelDeAsIS.close();
+                } catch (IOException ex) {
+                        LOGGER.log(Level.SEVERE, "Failed to load tokenizer model", ex);
+                }
+                return model;
+        }
+}

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/cache/EndpointFilter.java

r5035	r5720
1	1	package eu.clarin.sru.fcs.aggregator.cache;
2	2
3		import eu.clarin.sru.fcs.aggregator.~~sopt~~.Endpoint;
	3	import eu.clarin.sru.fcs.aggregator.registry.Endpoint;
4	4
5	5	/**

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/cache/EndpointUrlFilter.java

-                      r5035
+                      r5720
 package eu.clarin.sru.fcs.aggregator.cache;
 import eu.clarin.sru.fcs.aggregator.sopt.Endpoint;
+import eu.clarin.sru.fcs.aggregator.registry.Endpoint;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.List;
 /**
  * Filters for the cache of scan data (endpoint/resources descriptions) based
  * on endpoint url. Only endpoints containing one of the specified string in the
+ * Filters for the cache of scan data (endpoint/resources descriptions) based on
+ * endpoint url. Only endpoints containing one of the specified string in the
  * endpoint url will be cached. Useful for testing the endpoints.
+ *
+ *
  * @author yanapanchenko
  */
 public class EndpointUrlFilter implements EndpointFilter {
-    private String[] urlShouldContain = new String[0];
-    public void urlShouldContainAnyOf(String ... urlSubstrings) {
-        urlShouldContain = urlSubstrings;
+    }
+    @Override
+    public Iterable<Endpoint> filter(Iterable<Endpoint> endpoints) {
+        List<Endpoint> filtered = new ArrayList<Endpoint>();
+        for (Endpoint endp : endpoints) {
+            for (String urlSubstring : urlShouldContain) {
+                if (endp.getUrl().contains(urlSubstring)) {
+                    filtered.add(endp);
+                    break;
+                }
+            }
+        }
+        return filtered;
+    }
+        private List<String> allow = new ArrayList<String>();
+        public EndpointUrlFilter(String... fragments) {
+                Collections.addAll(allow, fragments);
+        }
+        @Override
+        public Iterable<Endpoint> filter(Iterable<Endpoint> endpoints) {
+                List<Endpoint> filtered = new ArrayList<Endpoint>();
+                for (Endpoint endp : endpoints) {
+                        for (String urlSubstring : allow) {
+                                if (endp.getUrl().contains(urlSubstring)) {
+                                        filtered.add(endp);
+                                        break;
+                                }
+                        }
+                }
+                return filtered;
+        }
+}

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/cache/ScanCache.java

-                      r5035
+                      r5720
 package eu.clarin.sru.fcs.aggregator.cache;
 import eu.clarin.sru.fcs.aggregator.sopt.Corpus;
 import eu.clarin.sru.fcs.aggregator.sopt.Institution;
+import eu.clarin.sru.fcs.aggregator.registry.Corpus;
+import eu.clarin.sru.fcs.aggregator.registry.Institution;
 import java.util.List;
 import java.util.Map;

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/cache/ScanCacheFile.java

-                      r5701
+                      r5720
 package eu.clarin.sru.fcs.aggregator.cache;
 import eu.clarin.sru.fcs.aggregator.sopt.Corpus;
 import eu.clarin.sru.fcs.aggregator.sopt.Endpoint;
 import eu.clarin.sru.fcs.aggregator.sopt.Institution;
+import eu.clarin.sru.fcs.aggregator.registry.Corpus;
+import eu.clarin.sru.fcs.aggregator.registry.Endpoint;
+import eu.clarin.sru.fcs.aggregator.registry.Institution;
 import java.io.BufferedOutputStream;
 import java.io.BufferedReader;
 …
  * @author yanapanchenko
  */
 public class ScanCacheFiled {
+public class ScanCacheFile {
     private String scanDirectory;
 …
     public static final String NL = "\n";
     public static final String SPACE = " ";
     private static final Logger LOGGER = Logger.getLogger(ScanCacheFiled.class.getName());
+    private static final Logger LOGGER = Logger.getLogger(ScanCacheFile.class.getName());
     /**
 …
      * ScanCache data are/should be stored.
      */
     public ScanCacheFiled(String scanDirectory) {
+    public ScanCacheFile(String scanDirectory) {
         this.scanDirectory = scanDirectory;
+    }

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/cache/ScanCrawlTask.java

-                      r5035
+                      r5720
 package eu.clarin.sru.fcs.aggregator.cache;
 import static eu.clarin.sru.fcs.aggregator.app.WebAppListener.CORPUS_CACHE;
+import java.util.concurrent.atomic.AtomicReference;
 import java.util.logging.Level;
 import java.util.logging.Logger;
-import org.zkoss.zk.ui.WebApp;
 /**
 …
     private final ScanCrawler scanCrawler;
     private ScanCacheFiled scanCacheFiled;
     private WebApp webapp;
+        private ScanCacheFile scanCacheFiled;
+        private AtomicReference<ScanCache> scanCacheAtom;
+    public ScanCrawlTask(
+            ScanCrawler scanCrawler, ScanCacheFiled scanCacheFiled, WebApp webapp) {
+        public ScanCrawlTask(ScanCrawler scanCrawler, ScanCacheFile scanCacheFiled, AtomicReference<ScanCache> scanCacheAtom) {
         this.scanCrawler = scanCrawler;
         this.scanCacheFiled = scanCacheFiled;
         this.webapp = webapp;
+                this.scanCacheFiled = scanCacheFiled;
+                this.scanCacheAtom = scanCacheAtom;
+    }
 …
             } else {
                 logger.log(Level.INFO, "Started cache write into the file");
                 scanCacheFiled.write(cacheNew);
                 webapp.setAttribute(CORPUS_CACHE, cacheNew);
+                                scanCacheFiled.write(cacheNew);
+                                scanCacheAtom.set(cacheNew);
                 logger.log(Level.INFO, "Finished cache write into the file");
+            }

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/cache/ScanCrawler.java

-                      r5039
+                      r5720
 package eu.clarin.sru.fcs.aggregator.cache;
-import eu.clarin.sru.client.SRUClientException;
 import eu.clarin.sru.client.SRUScanRequest;
 import eu.clarin.sru.client.SRUScanResponse;
 import eu.clarin.sru.client.SRUTerm;
 import eu.clarin.sru.client.SRUThreadedClient;
 import eu.clarin.sru.fcs.aggregator.sopt.CenterRegistryI;
 import eu.clarin.sru.fcs.aggregator.sopt.Corpus;
 import eu.clarin.sru.fcs.aggregator.sopt.Endpoint;
 import eu.clarin.sru.fcs.aggregator.sopt.Institution;
+import eu.clarin.sru.fcs.aggregator.registry.CenterRegistryI;
+import eu.clarin.sru.fcs.aggregator.registry.Corpus;
+import eu.clarin.sru.fcs.aggregator.registry.Endpoint;
+import eu.clarin.sru.fcs.aggregator.registry.Institution;
 import eu.clarin.sru.fcs.aggregator.util.SRUCQL;
 import java.util.ArrayList;
 import java.util.List;
-import java.util.concurrent.ExecutionException;
 import java.util.concurrent.Future;
 import java.util.concurrent.TimeUnit;
 …
 /**
  * Crawler for collecting endpoint scan operation responses of FCS specification.
  * Collects all the endpoints and resources descriptions.
+ *
+ * Crawler for collecting endpoint scan operation responses of FCS
+ * specification. Collects all the endpoints and resources descriptions.
+ *
  * @author yanapanchenko
  */
 public class ScanCrawler {
+    private static final Logger LOGGER = Logger.getLogger(ScanCrawler.class.getName());
+    private CenterRegistryI cr;
+    private SRUThreadedClient sruScanClient;
+    private int maxDepth = 1;
+    private EndpointFilter filter = null;
+    public ScanCrawler(CenterRegistryI centerRegistry, SRUThreadedClient sruScanClient) {
+        cr = centerRegistry;
+        this.sruScanClient = sruScanClient;
+    }
+    public ScanCrawler(CenterRegistryI centerRegistry, SRUThreadedClient sruScanClient, EndpointFilter filter, int maxDepth) {
+        this(centerRegistry, sruScanClient);
+        this.maxDepth = maxDepth;
+        this.filter = filter;
+    }
+    /**
+     * Crawler of scan operation of FCS specification. Collects all the endpoints
+     * and resources descriptions into the provided cache.
+     *
+     * @param cache cache into which the endpoints and resources descriptions
+     * from scan operation responses should be collected.
+     */
+    public void crawl(ScanCache cache) {
+        //TODO remember not responding root corpora and come back to them later... ?
+        List<Institution> institutions = cr.getCQLInstitutions();
+        //LOGGER.info(institutions.toString());
+        for (Institution institution : institutions) {
+            cache.addInstitution(institution);
+            Iterable<Endpoint> endpoints = institution.getEndpoints();
+            if (filter != null) {
+                endpoints = filter.filter(endpoints);
+            }
+            for (Endpoint endp : endpoints) {
+                Corpus parentCorpus = null;// i.e. it's root
+                addCorpora(sruScanClient, endp.getUrl(), institution, 0, parentCorpus, cache);
+            }
+        }
+    }
+    private void addCorpora(SRUThreadedClient sruScanClient, String endpointUrl,
+            Institution institution, int depth, Corpus parentCorpus, ScanCache cache) {
+        depth++;
+        if (depth > maxDepth) {
+            return;
+        }
+            List<Corpus> childrenCorpora = doScan(sruScanClient, endpointUrl, institution, parentCorpus);
+            //if (childrenCorpora.isEmpty()) {
+                for (Corpus c : childrenCorpora) {
+                    // don't add corpus that introduces cyclic references
+                    // as of March 2014, there are 2 such endpoints...
+                    if (cache.getCorpus(c.getHandle())!= null) {
+                        LOGGER.warning("Cyclic reference in corpus " + c.getHandle() + " of endpoint " + endpointUrl);
+                        continue;
+                    }
+                    //Corpus c = createCorpus(institution, endpointUrl, term);
+                    //Corpus c = new Corpus(institution, endpointUrl);
+//                    c.setHandle(term.getValue());
+//                    c.setDisplayName(term.getDisplayTerm());
+//                    if (term.getNumberOfRecords() > 0) {
+//                        c.setNumberOfRecords(term.getNumberOfRecords());
+//                    }
+//                    addExtraInfo(c, term);
+                    cache.addCorpus(c, parentCorpus);
+                    // how not to query default corpus???
+                    addCorpora(sruScanClient, c.getEndpointUrl(), c.getInstitution(),
+                            depth, c, cache);
+                }
+                //} else if () {
+                // TODO if diagnistics came back, try simple scan without the
+                // SRUCQLscan.RESOURCE_INFO_PARAMETER
+//            }
+//else {
+//                if (parentCorpus == null) { // means root
+                    // create default root corpus:
+//                    Corpus c = new Corpus(institution, endpointUrl);
+//                    cache.addCorpus(c);
+//                }
+//            }
+//            SRUScanResponse response = doScan(sruScanClient, endpointUrl, parentCorpus);
+//
+//
+//            if (response != null && response.hasTerms()) {
+//                for (SRUTerm term : response.getTerms()) {
+//                    // don't add corpus that introduces cyclic references
+//                    // as of March 2014, there are 2 such endpoints...
+//                    if (cache.getCorpus(term.getValue())!= null) {
+//                        LOGGER.warning("Cyclic reference in corpus " + term.getValue() + " of endpoint " + endpointUrl);
+//                        continue;
+//                    }
+//                    Corpus c = createCorpus(institution, endpointUrl, term);
+//                    //Corpus c = new Corpus(institution, endpointUrl);
+////                    c.setHandle(term.getValue());
+////                    c.setDisplayName(term.getDisplayTerm());
+////                    if (term.getNumberOfRecords() > 0) {
+////                        c.setNumberOfRecords(term.getNumberOfRecords());
+////                    }
+////                    addExtraInfo(c, term);
+//                    cache.addCorpus(c, parentCorpus);
+//                    addCorpora(sruScanClient, c.getEndpointUrl(), c.getInstitution(),
+//                            depth, c, cache);
+//                }
+//                //} else if () {
+//                // TODO if diagnistics came back, try simple scan without the
+//                // SRUCQLscan.RESOURCE_INFO_PARAMETER
+//            } else {
+//                if (parentCorpus == null) { // means root
+//                    // create default root corpus:
+//                    Corpus c = new Corpus(institution, endpointUrl);
+//                    cache.addCorpus(c);
+//                }
+//            }
+    }
+    private static String normalizeHandle(Corpus corpus, boolean root) {
+        if (root) {
+            return Corpus.ROOT_HANDLE;
+        }
+        String handle = corpus.getHandle();
+        if (Corpus.HANDLE_WITH_SPECIAL_CHARS.matcher(handle).matches()) {
+            //resourceValue = "%22" + resourceValue + "%22";
+            handle = "\"" + handle + "\"";
+        }
+        return handle;
+    }
+//    public static SRUScanResponse doScan(SRUThreadedClient sruScanClient,
+//            String endpointUrl, Corpus parentCorpus) {
+//
+//        Future<SRUScanResponse> corporaResponse = null;
+//        SRUScanResponse response = null;
+//        try {
+//                    SRUScanRequest corporaRequest = new SRUScanRequest(endpointUrl);
+//            StringBuilder scanClause = new StringBuilder(SRUCQL.SCAN_RESOURCE_PARAMETER);
+//            scanClause.append("=");
+//            String normalizedHandle = normalizeHandle(parentCorpus, parentCorpus == null);
+//            scanClause.append(normalizedHandle);
+//            corporaRequest.setScanClause(scanClause.toString());
+//            corporaRequest.setExtraRequestData(SRUCQL.SCAN_RESOURCE_INFO_PARAMETER,
+//                    SRUCQL.SCAN_RESOURCE_INFO_PARAMETER_DEFAULT_VALUE);
+//            corporaResponse = sruScanClient.scan(corporaRequest);
+//            Thread.sleep(5000);
+//            response = corporaResponse.get(600, TimeUnit.SECONDS);
+//            return response;
+//        }    catch (TimeoutException ex) {
+//            LOGGER.log(Level.SEVERE, "Timeout scanning corpora {0} at {1} {2} {3}",
+//                    new String[]{Corpus.ROOT_HANDLE, endpointUrl, ex.getClass().getName(), ex.getMessage()});
+//        } catch (Exception ex) {
+//            LOGGER.log(Level.SEVERE, "Error accessing corpora {0} at {1} {2} {3}",
+//                    new String[]{Corpus.ROOT_HANDLE, endpointUrl, ex.getClass().getName(), ex.getMessage()});
+//        } finally {
+//            if (corporaResponse != null && !corporaResponse.isDone()) {
+//                corporaResponse.cancel(true);
+//            }
+//        }
+//
+//        return response;
+//    }
+        public static List<Corpus> doScan(SRUThreadedClient sruScanClient,
+            String endpointUrl, Institution institution, Corpus parentCorpus) {
+        List<Corpus> corpora = new ArrayList<Corpus>();
+        Future<SRUScanResponse> corporaResponse = null;
+        SRUScanResponse response = null;
+        try {
+                    SRUScanRequest corporaRequest = new SRUScanRequest(endpointUrl);
+            StringBuilder scanClause = new StringBuilder(SRUCQL.SCAN_RESOURCE_PARAMETER);
+            scanClause.append("=");
+            String normalizedHandle = normalizeHandle(parentCorpus, parentCorpus == null);
+            scanClause.append(normalizedHandle);
+            corporaRequest.setScanClause(scanClause.toString());
+            corporaRequest.setExtraRequestData(SRUCQL.SCAN_RESOURCE_INFO_PARAMETER,
+                    SRUCQL.SCAN_RESOURCE_INFO_PARAMETER_DEFAULT_VALUE);
+            corporaResponse = sruScanClient.scan(corporaRequest);
+            Thread.sleep(5000);
+            response = corporaResponse.get(600, TimeUnit.SECONDS);
+        }    catch (TimeoutException ex) {
+            LOGGER.log(Level.SEVERE, "Timeout scanning corpora {0} at {1} {2} {3}",
+                    new String[]{Corpus.ROOT_HANDLE, endpointUrl, ex.getClass().getName(), ex.getMessage()});
+        } catch (Exception ex) {
+            LOGGER.log(Level.SEVERE, "Error accessing corpora {0} at {1} {2} {3}",
+                    new String[]{Corpus.ROOT_HANDLE, endpointUrl, ex.getClass().getName(), ex.getMessage()});
+        } finally {
+            if (corporaResponse != null && !corporaResponse.isDone()) {
+                corporaResponse.cancel(true);
+            }
+        }
+        if (response != null && response.hasTerms()) {
+                for (SRUTerm term : response.getTerms()) {
+                    // don't add corpus that introduces cyclic references
+                    // as of March 2014, there are 2 such endpoints...
+                    //if (cache.getCorpus(term.getValue())!= null) {
+                    //    LOGGER.warning("Cyclic reference in corpus " + term.getValue() + " of endpoint " + endpointUrl);
+                    //    continue;
+                    //}
+                    Corpus c = createCorpus(institution, endpointUrl, term);
+                    corpora.add(c);
+                }
+                //} else if () {
+                // TODO if diagnistics came back, try simple scan without the
+                // SRUCQLscan.RESOURCE_INFO_PARAMETER
+            } else {
+                if (parentCorpus == null) { // means root
+                    // create default root corpus:
+                    Corpus c = new Corpus(institution, endpointUrl);
+                    corpora.add(c);
+                }
+            }
+        return corpora;
+    }
+    private static Corpus createCorpus(Institution institution, String endpointUrl, SRUTerm term) {
+                    Corpus c = new Corpus(institution, endpointUrl);
+                    c.setHandle(term.getValue());
+                    c.setDisplayName(term.getDisplayTerm());
+                    if (term.getNumberOfRecords() > 0) {
+                        c.setNumberOfRecords(term.getNumberOfRecords());
+                    }
+                    addExtraInfo(c, term);
+                    return c;
+    }
+        // TODO: ask Oliver to add API support for the extra info in the
+    // SRU client/server libraries, so that it's not necessary to work
+    // with DocumentFragment
+    private static void addExtraInfo(Corpus c, SRUTerm term) {
+        DocumentFragment extraInfo = term.getExtraTermData();
+        String enDescription = null;
+        if (extraInfo != null) {
+            NodeList infoNodes = extraInfo.getChildNodes().item(0).getChildNodes();
+            for (int i = 0; i < infoNodes.getLength(); i++) {
+                Node infoNode = infoNodes.item(i);
+                if (infoNode.getNodeType() == Node.ELEMENT_NODE && infoNode.getLocalName().equals("LandingPageURI")) {
+                    c.setLandingPage(infoNode.getTextContent().trim());
+                } else if (infoNode.getNodeType() == Node.ELEMENT_NODE && infoNode.getLocalName().equals("Languages")) {
+                    NodeList languageNodes = infoNode.getChildNodes();
+                    for (int j = 0; j < languageNodes.getLength(); j++) {
+                        if (languageNodes.item(j).getNodeType() == Node.ELEMENT_NODE && languageNodes.item(j).getLocalName().equals("Language")) {
+                            Element languageNode = (Element) languageNodes.item(j);
+                            String languageText = languageNode.getTextContent().trim();
+                            if (!languageText.isEmpty()) {
+                                c.addLanguage(languageText.trim());
+                            }
+                        }
+                    }
+                } else if (infoNode.getNodeType() == Node.ELEMENT_NODE && infoNode.getLocalName().equals("Description")) {
+                    Element element = (Element) infoNode;
+                    String descr = infoNode.getTextContent().replaceAll("&lt;br/&gt;", " ");
+                    descr = descr.replaceAll("<br/>", " ");
+                    descr = descr.replaceAll("[\t\n\r ]+", " ");
+                    c.setDescription(descr.trim());
+                    //String lang = element.getAttributeNS("http://clarin.eu/fcs/1.0/resource-info", "lang");
+                    //System.out.println("ATTRIBUTE LANG: " + lang);
+                    if ("en".equals(element.getAttribute("xml:lang"))) {
+                        enDescription = c.getDescription();
+                    }
+                }
+            }
+            // description in Engish has priority
+            if (enDescription != null && !enDescription.isEmpty()) {
+                c.setDescription(enDescription);
+            }
+        }
+    }
+        private static final Logger LOGGER = Logger.getLogger(ScanCrawler.class.getName());
+        private CenterRegistryI cr;
+        private SRUThreadedClient sruScanClient;
+        private int maxDepth = 1;
+        private EndpointFilter filter = null;
+        public ScanCrawler(CenterRegistryI centerRegistry, SRUThreadedClient sruScanClient) {
+                cr = centerRegistry;
+                this.sruScanClient = sruScanClient;
+        }
+        public ScanCrawler(CenterRegistryI centerRegistry, SRUThreadedClient sruScanClient, EndpointFilter filter, int maxDepth) {
+                this(centerRegistry, sruScanClient);
+                this.maxDepth = maxDepth;
+                this.filter = filter;
+        }
+        /**
+         * Crawler of scan operation of FCS specification. Collects all the
+         * endpoints and resources descriptions into the provided cache.
+         *
+         * @param cache cache into which the endpoints and resources descriptions
+         * from scan operation responses should be collected.
+         */
+        public void crawl(ScanCache cache) {
+                List<Institution> institutions = cr.getCQLInstitutions();
+                for (Institution institution : institutions) {
+                        cache.addInstitution(institution);
+                        Iterable<Endpoint> endpoints = institution.getEndpoints();
+                        if (filter != null) {
+                                endpoints = filter.filter(endpoints);
+                        }
+                        for (Endpoint endp : endpoints) {
+                                Corpus parentCorpus = null;// i.e. it's root
+                                addCorpora(sruScanClient, endp.getUrl(), institution, 0, parentCorpus, cache);
+                        }
+                }
+        }
+        private void addCorpora(SRUThreadedClient sruScanClient, String endpointUrl,
+                        Institution institution, int depth, Corpus parentCorpus, ScanCache cache) {
+                depth++;
+                if (depth > maxDepth) {
+                        return;
+                }
+                List<Corpus> childrenCorpora = doScan(sruScanClient, endpointUrl, institution, parentCorpus);
+                for (Corpus c : childrenCorpora) {
+                        // don't add corpus that introduces cyclic references
+                        // as of March 2014, there are 2 such endpoints...
+                        if (cache.getCorpus(c.getHandle()) != null) {
+                                LOGGER.warning("Cyclic reference in corpus " + c.getHandle() + " of endpoint " + endpointUrl);
+                                continue;
+                        }
+                        cache.addCorpus(c, parentCorpus);
+                        addCorpora(sruScanClient, c.getEndpointUrl(), c.getInstitution(),
+                                        depth, c, cache);
+                }
+        }
+        public static List<Corpus> doScan(SRUThreadedClient sruScanClient,
+                        String endpointUrl, Institution institution, Corpus parentCorpus) {
+                List<Corpus> corpora = new ArrayList<Corpus>();
+                Future<SRUScanResponse> corporaResponse = null;
+                SRUScanResponse response = null;
+                try {
+                        SRUScanRequest corporaRequest = new SRUScanRequest(endpointUrl);
+                        StringBuilder scanClause = new StringBuilder(SRUCQL.SCAN_RESOURCE_PARAMETER);
+                        scanClause.append("=");
+                        String normalizedHandle = normalizeHandle(parentCorpus, parentCorpus == null);
+                        scanClause.append(normalizedHandle);
+                        corporaRequest.setScanClause(scanClause.toString());
+                        corporaRequest.setExtraRequestData(SRUCQL.SCAN_RESOURCE_INFO_PARAMETER,
+                                        SRUCQL.SCAN_RESOURCE_INFO_PARAMETER_DEFAULT_VALUE);
+                        corporaResponse = sruScanClient.scan(corporaRequest);
+                        Thread.sleep(5000);
+                        response = corporaResponse.get(600, TimeUnit.SECONDS);
+                } catch (TimeoutException ex) {
+                        LOGGER.log(Level.SEVERE, "Timeout scanning corpora {0} at {1} {2} {3}",
+                                        new String[]{Corpus.ROOT_HANDLE, endpointUrl, ex.getClass().getName(), ex.getMessage()});
+                } catch (Exception ex) {
+                        LOGGER.log(Level.SEVERE, "Error accessing corpora {0} at {1} {2} {3}",
+                                        new String[]{Corpus.ROOT_HANDLE, endpointUrl, ex.getClass().getName(), ex.getMessage()});
+                } finally {
+                        if (corporaResponse != null && !corporaResponse.isDone()) {
+                                corporaResponse.cancel(true);
+                        }
+                }
+                if (response != null && response.hasTerms()) {
+                        for (SRUTerm term : response.getTerms()) {
+                                Corpus c = createCorpus(institution, endpointUrl, term);
+                                corpora.add(c);
+                        }
+                } else {
+                        if (parentCorpus == null) { // means root
+                                // create default root corpus:
+                                Corpus c = new Corpus(institution, endpointUrl);
+                                corpora.add(c);
+                        }
+                }
+                return corpora;
+        }
+        private static String normalizeHandle(Corpus corpus, boolean root) {
+                if (root) {
+                        return Corpus.ROOT_HANDLE;
+                }
+                String handle = corpus.getHandle();
+                if (Corpus.HANDLE_WITH_SPECIAL_CHARS.matcher(handle).matches()) {
+                        //resourceValue = "%22" + resourceValue + "%22";
+                        handle = "\"" + handle + "\"";
+                }
+                return handle;
+        }
+        private static Corpus createCorpus(Institution institution, String endpointUrl, SRUTerm term) {
+                Corpus c = new Corpus(institution, endpointUrl);
+                c.setHandle(term.getValue());
+                c.setDisplayName(term.getDisplayTerm());
+                if (term.getNumberOfRecords() > 0) {
+                        c.setNumberOfRecords(term.getNumberOfRecords());
+                }
+                addExtraInfo(c, term);
+                return c;
+        }
+        // TODO: ask Oliver to add API support for the extra info in the
+        // SRU client/server libraries, so that it's not necessary to work
+        // with DocumentFragment
+        private static void addExtraInfo(Corpus c, SRUTerm term) {
+                DocumentFragment extraInfo = term.getExtraTermData();
+                String enDescription = null;
+                if (extraInfo != null) {
+                        NodeList infoNodes = extraInfo.getChildNodes().item(0).getChildNodes();
+                        for (int i = 0; i < infoNodes.getLength(); i++) {
+                                Node infoNode = infoNodes.item(i);
+                                if (infoNode.getNodeType() == Node.ELEMENT_NODE && infoNode.getLocalName().equals("LandingPageURI")) {
+                                        c.setLandingPage(infoNode.getTextContent().trim());
+                                } else if (infoNode.getNodeType() == Node.ELEMENT_NODE && infoNode.getLocalName().equals("Languages")) {
+                                        NodeList languageNodes = infoNode.getChildNodes();
+                                        for (int j = 0; j < languageNodes.getLength(); j++) {
+                                                if (languageNodes.item(j).getNodeType() == Node.ELEMENT_NODE && languageNodes.item(j).getLocalName().equals("Language")) {
+                                                        Element languageNode = (Element) languageNodes.item(j);
+                                                        String languageText = languageNode.getTextContent().trim();
+                                                        if (!languageText.isEmpty()) {
+                                                                c.addLanguage(languageText.trim());
+                                                        }
+                                                }
+                                        }
+                                } else if (infoNode.getNodeType() == Node.ELEMENT_NODE && infoNode.getLocalName().equals("Description")) {
+                                        Element element = (Element) infoNode;
+                                        String descr = infoNode.getTextContent().replaceAll("&lt;br/&gt;", " ");
+                                        descr = descr.replaceAll("<br/>", " ");
+                                        descr = descr.replaceAll("[\t\n\r ]+", " ");
+                                        c.setDescription(descr.trim());
+                                        //String lang = element.getAttributeNS("http://clarin.eu/fcs/1.0/resource-info", "lang");
+                                        //System.out.println("ATTRIBUTE LANG: " + lang);
+                                        if ("en".equals(element.getAttribute("xml:lang"))) {
+                                                enDescription = c.getDescription();
+                                        }
+                                }
+                        }
+                        // description in Engish has priority
+                        if (enDescription != null && !enDescription.isEmpty()) {
+                                c.setDescription(enDescription);
+                        }
+                }
+        }
+}

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/cache/SimpleInMemScanCache.java

-                      r5036
+                      r5720
 package eu.clarin.sru.fcs.aggregator.cache;
 import eu.clarin.sru.fcs.aggregator.sopt.Corpus;
 import eu.clarin.sru.fcs.aggregator.sopt.Institution;
+import eu.clarin.sru.fcs.aggregator.registry.Corpus;
+import eu.clarin.sru.fcs.aggregator.registry.Institution;
 import java.util.ArrayList;
 import java.util.HashMap;
 …
 /**
  * Implementation of the cached scan data (endpoints descriptions) that
  * stores the cache in memory in maps.
+ * Implementation of the cached scan data (endpoints descriptions) that stores
+ * the cache in memory in maps.
+ *
  * @author yanapanchenko
 …
 public class SimpleInMemScanCache implements ScanCache {
     private Map<String, List<Corpus>> enpUrlToRootCorpora = new LinkedHashMap<String, List<Corpus>>(30);
     private Map<String, List<Corpus>> corpusToChildren = new HashMap<String, List<Corpus>>();
     //private Map<String, String> childToParent = new HashMap<String, String>();
     private Map<String, Corpus> handleToCorpus = new HashMap<String, Corpus>();
     private Map<String, Set<Corpus>> langToRootCorpora = new HashMap<String, Set<Corpus>>();
     private Map<String, Set<Corpus>> langToTopUniqueCorpora = new HashMap<String, Set<Corpus>>();
     private List<Institution> institutions = new ArrayList<Institution>();
+        private Map<String, List<Corpus>> enpUrlToRootCorpora = new LinkedHashMap<String, List<Corpus>>(30);
+        private Map<String, List<Corpus>> corpusToChildren = new HashMap<String, List<Corpus>>();
+        //private Map<String, String> childToParent = new HashMap<String, String>();
+        private Map<String, Corpus> handleToCorpus = new HashMap<String, Corpus>();
+        private Map<String, Set<Corpus>> langToRootCorpora = new HashMap<String, Set<Corpus>>();
+        private Map<String, Set<Corpus>> langToTopUniqueCorpora = new HashMap<String, Set<Corpus>>();
+        private List<Institution> institutions = new ArrayList<Institution>();
+    private static final Logger LOGGER = Logger.getLogger(SimpleInMemScanCache.class.getName());
+    @Override
+    public List<Institution> getInstitutions() {
+        return institutions;
+    }
+        private static final Logger LOGGER = Logger.getLogger(SimpleInMemScanCache.class.getName());
+    @Override
+    public List<Corpus> getRootCorporaOfEndpoint(String enpointUrl) {
+        List<Corpus> roots = new ArrayList<Corpus>();
+        if (enpUrlToRootCorpora.containsKey(enpointUrl)) {
+            roots.addAll(enpUrlToRootCorpora.get(enpointUrl));
+        }
+        return roots;
+    }
+        @Override
+        public List<Institution> getInstitutions() {
+                return institutions;
+        }
+    @Override
+    public void addInstitution(Institution institution) {
+        institutions.add(institution);
+    }
+        @Override
+        public List<Corpus> getRootCorporaOfEndpoint(String enpointUrl) {
+                List<Corpus> roots = new ArrayList<Corpus>();
+                if (enpUrlToRootCorpora.containsKey(enpointUrl)) {
+                        roots.addAll(enpUrlToRootCorpora.get(enpointUrl));
+                }
+                return roots;
+        }
     @Override
     public void addCorpus(Corpus c) {
         addCorpus(c, null);
+    }
+        @Override
+        public void addInstitution(Institution institution) {
+                institutions.add(institution);
+        }
+    @Override
+    public void addCorpus(Corpus c, Corpus parentCorpus) {
+        @Override
+        public void addCorpus(Corpus c) {
+                addCorpus(c, null);
+        }
+        @Override
+        public void addCorpus(Corpus c, Corpus parentCorpus) {
         handleToCorpus.put(c.getHandle(), c);
+                handleToCorpus.put(c.getHandle(), c);
+        if (parentCorpus == null) { //i.e it's a root corpus
+            // index root corpora as for their languages
+            for (String lang : c.getLanguages()) {
+                if (!langToRootCorpora.containsKey(lang)) {
+                    langToRootCorpora.put(lang, new HashSet<Corpus>());
+                }
+                langToRootCorpora.get(lang).add(c);
+            }
+            // index root corpora as for their endpint url
+            if (!enpUrlToRootCorpora.containsKey(c.getEndpointUrl())) {
+                enpUrlToRootCorpora.put(c.getEndpointUrl(), new ArrayList<Corpus>());
+            }
+            enpUrlToRootCorpora.get(c.getEndpointUrl()).add(c);
+            //childToParent.put(c.getHandle(), Corpus.ROOT_HANDLE);
+        } else {
+            if (!corpusToChildren.containsKey(parentCorpus.getHandle())) {
+                corpusToChildren.put(parentCorpus.getHandle(), new ArrayList<Corpus>());
+            }
+            corpusToChildren.get(parentCorpus.getHandle()).add(c);
+            //childToParent.put(c.getHandle(), parentCorpus.getHandle());
+        }
+        // index top corpora with unique language as for their languages
+        if (c.getLanguages().size() == 1 &&
+                (parentCorpus == null || parentCorpus.getLanguages().size() > 0)) {
+            String lang = getElementOfStringUnitset(c.getLanguages());
+            if (!langToTopUniqueCorpora.containsKey(lang)) {
+                langToTopUniqueCorpora.put(lang, new LinkedHashSet<Corpus>());
+            }
+            langToTopUniqueCorpora.get(lang).add(c);
+        }
+    }
+                if (parentCorpus == null) { //i.e it's a root corpus
+                        // index root corpora as for their languages
+                        for (String lang : c.getLanguages()) {
+                                if (!langToRootCorpora.containsKey(lang)) {
+                                        langToRootCorpora.put(lang, new HashSet<Corpus>());
+                                }
+                                langToRootCorpora.get(lang).add(c);
+                        }
+                        // index root corpora as for their endpint url
+                        if (!enpUrlToRootCorpora.containsKey(c.getEndpointUrl())) {
+                                enpUrlToRootCorpora.put(c.getEndpointUrl(), new ArrayList<Corpus>());
+                        }
+                        enpUrlToRootCorpora.get(c.getEndpointUrl()).add(c);
+                        //childToParent.put(c.getHandle(), Corpus.ROOT_HANDLE);
+                } else {
+                        if (!corpusToChildren.containsKey(parentCorpus.getHandle())) {
+                                corpusToChildren.put(parentCorpus.getHandle(), new ArrayList<Corpus>());
+                        }
+                        corpusToChildren.get(parentCorpus.getHandle()).add(c);
+                        //childToParent.put(c.getHandle(), parentCorpus.getHandle());
+                }
+    @Override
+    public String toString() {
+        return "cache{\n" + "institutions=" + institutions + "\n"
+                + "enpUrlToRootCorpora=" + enpUrlToRootCorpora
+                + "\n corpusToChildren=" + corpusToChildren
+                + "\n langToTopUniqueCorpora=" + langToTopUniqueCorpora + "\n}";
+    }
+                // index top corpora with unique language as for their languages
+                if (c.getLanguages().size() == 1
+                                && (parentCorpus == null || parentCorpus.getLanguages().size() > 0)) {
+                        String lang = getElementOfStringUnitset(c.getLanguages());
+                        if (!langToTopUniqueCorpora.containsKey(lang)) {
+                                langToTopUniqueCorpora.put(lang, new LinkedHashSet<Corpus>());
+                        }
+                        langToTopUniqueCorpora.get(lang).add(c);
+                }
+        }
+    @Override
+    public boolean isEmpty() {
+        return enpUrlToRootCorpora.isEmpty();
+    }
+        @Override
+        public String toString() {
+                return "cache{\n" + "institutions=" + institutions + "\n"
+                                + "enpUrlToRootCorpora=" + enpUrlToRootCorpora
+                                + "\n corpusToChildren=" + corpusToChildren
+                                + "\n langToTopUniqueCorpora=" + langToTopUniqueCorpora + "\n}";
+        }
+    @Override
+    public List<Corpus> getRootCorpora() {
+        List<Corpus> rootCorpora = new ArrayList<Corpus>(enpUrlToRootCorpora.size());
+        for (List<Corpus> corpora : this.enpUrlToRootCorpora.values()) {
+            rootCorpora.addAll(corpora);
+        }
+        return rootCorpora;
+    }
+        @Override
+        public boolean isEmpty() {
+                return enpUrlToRootCorpora.isEmpty();
+        }
+    @Override
+    public Set<String> getLanguages() {
+        Set<String> languages = new HashSet<String>(this.langToRootCorpora.size());
+        languages.addAll(this.langToRootCorpora.keySet());
+        return languages;
+    }
+        @Override
+        public List<Corpus> getRootCorpora() {
+                List<Corpus> rootCorpora = new ArrayList<Corpus>(enpUrlToRootCorpora.size());
+                for (List<Corpus> corpora : this.enpUrlToRootCorpora.values()) {
+                        rootCorpora.addAll(corpora);
+                }
+                return rootCorpora;
+        }
+    @Override
+    public List<Corpus> getChildren(Corpus corpus) {
+        List<Corpus> corpora = this.corpusToChildren.get(corpus.getHandle());
+        if (corpora == null) {
+            return (new ArrayList<Corpus>());
+        } else {
+            List<Corpus> corporaCopy = new ArrayList<Corpus>(corpora);
+            return corporaCopy;
+        }
+    }
+        @Override
+        public Set<String> getLanguages() {
+                Set<String> languages = new HashSet<String>(this.langToRootCorpora.size());
+                languages.addAll(this.langToRootCorpora.keySet());
+                return languages;
+        }
+    @Override
+    public Map<String, Set<Corpus>> getRootCorporaForLang() {
+        return langToRootCorpora;
+    }
+        @Override
+        public List<Corpus> getChildren(Corpus corpus) {
+                List<Corpus> corpora = this.corpusToChildren.get(corpus.getHandle());
+                if (corpora == null) {
+                        return (new ArrayList<Corpus>());
+                } else {
+                        List<Corpus> corporaCopy = new ArrayList<Corpus>(corpora);
+                        return corporaCopy;
+                }
+        }
+    @Override
+    public List<Corpus> getRootCorporaForLang(String lang) {
+        List<Corpus> rootCorpora = new ArrayList<Corpus>(enpUrlToRootCorpora.size());
+        for (List<Corpus> corpora : this.enpUrlToRootCorpora.values()) {
+            for (Corpus corpus : corpora) {
+                if (corpus.getLanguages().contains(lang)) {
+                    rootCorpora.add(corpus);
+                }
+            }
+        }
+        return rootCorpora;
+    }
+        @Override
+        public Map<String, Set<Corpus>> getRootCorporaForLang() {
+                return langToRootCorpora;
+        }
+    @Override
+    public Map<String, Set<Corpus>> getTopUniqueLangToCorpora() {
+        return this.langToTopUniqueCorpora;
+    }
+        @Override
+        public List<Corpus> getRootCorporaForLang(String lang) {
+                List<Corpus> rootCorpora = new ArrayList<Corpus>(enpUrlToRootCorpora.size());
+                for (List<Corpus> corpora : this.enpUrlToRootCorpora.values()) {
+                        for (Corpus corpus : corpora) {
+                                if (corpus.getLanguages().contains(lang)) {
+                                        rootCorpora.add(corpus);
+                                }
+                        }
+                }
+                return rootCorpora;
+        }
+    @Override
+    public List<Corpus> getTopUniqueLanguageCorpora(String lang) {
+        ArrayList<Corpus> corpora = new ArrayList<Corpus>(langToTopUniqueCorpora.get(lang).size());
+        corpora.addAll(langToTopUniqueCorpora.get(lang));
+        return corpora;
+    }
+        @Override
+        public Map<String, Set<Corpus>> getTopUniqueLangToCorpora() {
+                return this.langToTopUniqueCorpora;
+        }
+    @Override
+    public Corpus getCorpus(String handle) {
+        return this.handleToCorpus.get(handle);
+    }
+        @Override
+        public List<Corpus> getTopUniqueLanguageCorpora(String lang) {
+                ArrayList<Corpus> corpora = new ArrayList<Corpus>(langToTopUniqueCorpora.get(lang).size());
+                corpora.addAll(langToTopUniqueCorpora.get(lang));
+                return corpora;
+        }
+    private String getElementOfStringUnitset(Set<String> stringUnitSet) {
+        return stringUnitSet.iterator().next();
+    }
+        @Override
+        public Corpus getCorpus(String handle) {
+                return this.handleToCorpus.get(handle);
+        }
+        private String getElementOfStringUnitset(Set<String> stringUnitSet) {
+                return stringUnitSet.iterator().next();
+        }
+}

SRUAggregator/trunk/src/main/java/eu/clarin/sru/fcs/aggregator/util/SRUCQL.java

-                      r5041
+                      r5720
 /**
  * Utility for storing constants related to SRU/CQL specification.
+ *
+ *
  * @author Yana Panchenko
  */
 public class SRUCQL {
+    public static final String OPERATION = "operation";
+    public static final String VERSION = "version";
+    public static final String SEARCH_RETRIEVE = "searchRetrieve";
+    public static final String SEARCH_CORPUS_HANDLE_PARAMETER = "x-cmd-context";
+    public static final String SEARCH_QUERY_PARAMETER = "query";
+    public static final String SCAN = "scan";
+    public static final String SCAN_RESOURCE_PARAMETER = "fcs.resource";
+    public static final String SCAN_RESOURCE_PARAMETER_DEFAULT_VALUE = "root";
+    public static final String SCAN_RESOURCE_INFO_PARAMETER = "x-cmd-resource-info";
+    public static final String SCAN_RESOURCE_INFO_PARAMETER_DEFAULT_VALUE = "true";
+    public static final String EXPLAIN = "explain";
+    public static final String AGGREGATION_CONTEXT = "x-aggregation-context";
+        public static final String VERSION = "version";
+        public static final String SEARCH_RETRIEVE = "searchRetrieve";
+        public static final String SEARCH_CORPUS_HANDLE_PARAMETER = "x-cmd-context";
+        public static final String SEARCH_QUERY_PARAMETER = "query";
+        public static final String SCAN = "scan";
+        public static final String SCAN_RESOURCE_PARAMETER = "fcs.resource";
+        public static final String SCAN_RESOURCE_PARAMETER_DEFAULT_VALUE = "root";
+        public static final String SCAN_RESOURCE_INFO_PARAMETER = "x-cmd-resource-info";
+        public static final String SCAN_RESOURCE_INFO_PARAMETER_DEFAULT_VALUE = "true";
+        public static final String EXPLAIN = "explain";
+        public static final String AGGREGATION_CONTEXT = "x-aggregation-context";
+}

SRUAggregator/trunk/src/main/webapp/META-INF/context.xml

r2450	r5720
1	1	<?xml version="1.0" encoding="UTF-8"?>
2		<Context antiJARLocking="true" path="/~~aggregator~~"/>
	2	<Context antiJARLocking="true" path="/Aggregator2"/>

SRUAggregator/trunk/src/main/webapp/WEB-INF/web.xml

-                      r5041
+                      r5720
 <?xml version="1.0" encoding="UTF-8"?>
 <web-app version="3.0" xmlns="http://java.sun.com/xml/ns/javaee" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://java.sun.com/xml/ns/javaee http://java.sun.com/xml/ns/javaee/web-app_3_0.xsd">
+    <description>CLARIN-D Federated Content Search Aggregator</description>
+    <display-name>CLARIN-D Federated Content Search Aggregator</display-name>
+    <env-entry>
+        <env-entry-name>center-registry-url</env-entry-name>
+        <env-entry-type>java.lang.String</env-entry-type>
+        <env-entry-value>http://centerregistry-clarin.esc.rzg.mpg.de/restxml/</env-entry-value>
+    </env-entry>
+    <env-entry>
+        <env-entry-name>weblicht-url</env-entry-name>
+        <env-entry-type>java.lang.String</env-entry-type>
+        <env-entry-value>https://weblicht.sfs.uni-tuebingen.de/WebLicht-4/?input=</env-entry-value>
+    </env-entry>
+    <env-entry>
+        <env-entry-name>update-interval-unit</env-entry-name>
+        <env-entry-type>java.lang.String</env-entry-type>
+        <env-entry-value>HOURS</env-entry-value>
+    </env-entry>
+    <env-entry>
+        <env-entry-name>update-interval</env-entry-name>
+        <env-entry-type>java.lang.Integer</env-entry-type>
+        <env-entry-value>6</env-entry-value>
+    </env-entry>
+    <env-entry>
+        <env-entry-name>scan-max-depth</env-entry-name>
+        <env-entry-type>java.lang.Integer</env-entry-type>
+        <env-entry-value>3</env-entry-value>
+    </env-entry>
+    <!-- Value of this property (data.location) should be specified in CATALINA_OPT
+         (e.g. inside /etc/init.d/tomcat7*), unless the default is used.
+         Currently defaults to /data/ or user.home -->
+    <env-entry>
+        <env-entry-name>data-location-property</env-entry-name>
+        <env-entry-type>java.lang.String</env-entry-type>
+        <env-entry-value>data.location</env-entry-value>
+    </env-entry>
+    <!-- Folder for the data specific to the current Aggregator application,
+         supposed to be inside the data location folder above  -->
+    <env-entry>
+        <env-entry-name>aggregator-folder</env-entry-name>
+        <env-entry-type>java.lang.String</env-entry-type>
+        <env-entry-value>fcsAggregator</env-entry-value>
+    </env-entry>
+    <listener>
+        <description>ZK listener for session cleanup</description>
+        <listener-class>org.zkoss.zk.ui.http.HttpSessionListener</listener-class>
+    </listener>
+    <servlet>
+        <description>ZK loader for ZUML pages</description>
+        <servlet-name>zkLoader</servlet-name>
+        <servlet-class>org.zkoss.zk.ui.http.DHtmlLayoutServlet</servlet-class>
+        <init-param>
+            <param-name>update-uri</param-name>
+            <param-value>/zkau</param-value>
+        </init-param>
+        <load-on-startup>1</load-on-startup>
+    </servlet>
+    <servlet-mapping>
+        <servlet-name>zkLoader</servlet-name>
+        <url-pattern>*.zul</url-pattern>
+    </servlet-mapping>
+    <servlet-mapping>
+        <servlet-name>zkLoader</servlet-name>
+        <url-pattern>*.zhtml</url-pattern>
+    </servlet-mapping>
+    <!-- Optional. Uncomment it if you want to use richlets.
+    <servlet-mapping>
+        <servlet-name>zkLoader</servlet-name>
+        <url-pattern>/zk/*</url-pattern>
+    </servlet-mapping>
+    -->
+    <servlet>
+        <description>The asynchronous update engine for ZK</description>
+        <servlet-name>auEngine</servlet-name>
+        <servlet-class>org.zkoss.zk.au.http.DHtmlUpdateServlet</servlet-class>
+    </servlet>
+    <servlet-mapping>
+        <servlet-name>auEngine</servlet-name>
+        <url-pattern>/zkau/*</url-pattern>
+    </servlet-mapping>
+    <servlet>
+        <servlet-name>ServletAdaptor</servlet-name>
+        <servlet-class>com.sun.jersey.spi.container.servlet.ServletContainer</servlet-class>
+         <init-param>
+         <param-name>javax.ws.rs.Application</param-name>
+        <param-value>eu.clarin.sru.fcs.aggregator.rest.AggregatorService</param-value>
+        </init-param>
+        <load-on-startup>1</load-on-startup>
+    </servlet>
+    <servlet-mapping>
+        <servlet-name>ServletAdaptor</servlet-name>
+        <url-pattern>/service/*</url-pattern>
+    </servlet-mapping>
+    <welcome-file-list>
+        <welcome-file>index.zul</welcome-file>
+        <welcome-file>index.zhtml</welcome-file>
+        <welcome-file>index.html</welcome-file>
+        <welcome-file>index.htm</welcome-file>
+    </welcome-file-list>
+        <description>CLARIN-D Federated Content Search Aggregator</description>
+        <display-name>CLARIN-D Federated Content Search Aggregator</display-name>
+        <env-entry>
+                <env-entry-name>center-registry-url</env-entry-name>
+                <env-entry-type>java.lang.String</env-entry-type>
+                <env-entry-value>http://centerregistry-clarin.esc.rzg.mpg.de/restxml/</env-entry-value>
+        </env-entry>
+        <env-entry>
+                <env-entry-name>weblicht-url</env-entry-name>
+                <env-entry-type>java.lang.String</env-entry-type>
+                <env-entry-value>https://weblicht.sfs.uni-tuebingen.de/WebLicht-4/?input=</env-entry-value>
+        </env-entry>
+        <env-entry>
+                <env-entry-name>update-interval-unit</env-entry-name>
+                <env-entry-type>java.lang.String</env-entry-type>
+                <env-entry-value>HOURS</env-entry-value>
+        </env-entry>
+        <env-entry>
+                <env-entry-name>update-interval</env-entry-name>
+                <env-entry-type>java.lang.Integer</env-entry-type>
+                <env-entry-value>6</env-entry-value>
+        </env-entry>
+        <env-entry>
+                <env-entry-name>scan-max-depth</env-entry-name>
+                <env-entry-type>java.lang.Integer</env-entry-type>
+                <env-entry-value>3</env-entry-value>
+        </env-entry>
+        <!-- Value of this property (data.location) should be specified in CATALINA_OPT
+        (e.g. inside /etc/init.d/tomcat7*), unless the default is used.
+        Currently defaults to /data/ or user.home -->
+        <env-entry>
+                <env-entry-name>data-location-property</env-entry-name>
+                <env-entry-type>java.lang.String</env-entry-type>
+                <env-entry-value>data.location</env-entry-value>
+        </env-entry>
+        <!-- Folder for the data specific to the current Aggregator application,
+        supposed to be inside the data location folder above  -->
+        <env-entry>
+                <env-entry-name>aggregator-folder</env-entry-name>
+                <env-entry-type>java.lang.String</env-entry-type>
+                <env-entry-value>fcsAggregator</env-entry-value>
+        </env-entry>
+        <servlet>
+                <servlet-name>Jersey REST Service</servlet-name>
+                <servlet-class>com.sun.jersey.spi.container.servlet.ServletContainer</servlet-class>
+                <init-param>
+                        <param-name>com.sun.jersey.config.property.packages</param-name>
+                        <param-value>eu.clarin.sru.fcs.aggregator.rest;org.codehaus.jackson.jaxrs</param-value>
+                </init-param>
+                <load-on-startup>1</load-on-startup>
+        </servlet>
+        <servlet-mapping>
+                <servlet-name>Jersey REST Service</servlet-name>
+                <url-pattern>/rest/*</url-pattern>
+        </servlet-mapping>
+        <session-config>
+                <session-timeout>30</session-timeout>
+        </session-config>
+        <welcome-file-list>
+                <welcome-file>index.html</welcome-file>
+        </welcome-file-list>
+        <listener>
+                <listener-class>eu.clarin.sru.fcs.aggregator.app.Aggregator</listener-class>
+        </listener>
 </web-app>

SRUAggregator/trunk/src/test/java/eu/clarin/sru/fcs/aggregator/app/ScanCacheFileTest.java

-                      r5701
+                      r5720
 package eu.clarin.sru.fcs.aggregator.app;
-import eu.clarin.sru.fcs.aggregator.cache.ScanCacheFiled;
-import eu.clarin.sru.fcs.aggregator.cache.SimpleInMemScanCache;
 import eu.clarin.sru.fcs.aggregator.cache.ScanCache;
+import eu.clarin.sru.fcs.aggregator.sopt.Corpus;
+import eu.clarin.sru.fcs.aggregator.sopt.Endpoint;
+import eu.clarin.sru.fcs.aggregator.cache.ScanCacheFile;
+import eu.clarin.sru.fcs.aggregator.registry.Corpus;
+import eu.clarin.sru.fcs.aggregator.registry.Endpoint;
 import java.io.File;
 import java.util.List;
 import org.junit.Assert;
+import org.junit.Ignore;
 import org.junit.Test;
 …
  * @author yanapanchenko
  */
+public class ScanCacheFiledTest {
+    @Test
+    public void testReadWriteDepth1() {
+        String scanDir = "/scan-bas";
+        String scanPath1 = this.getClass().getResource(scanDir).getFile();
+        String scanPath2 = "/tmp/scan-bas";
+        File scanDir2 = new File(scanPath2);
+        if (!scanDir2.exists()) {
+            scanDir2.mkdir();
+        }
+        ScanCacheFiled scanFiled1 = new ScanCacheFiled(scanPath1);
+        ScanCache cacheOrig = scanFiled1.read();
+        ScanCacheFiled scanFiled2 = new ScanCacheFiled(scanPath2);
+        scanFiled2.write(cacheOrig);
+        ScanCacheFiled scanFiled3 = new ScanCacheFiled(scanPath2);
+        ScanCache cacheRewritten = scanFiled3.read();
+        //make sure caches contain the same info after read-write
+        Assert.assertEquals(cacheOrig.getInstitutions().size(), cacheRewritten.getInstitutions().size());
+        Endpoint epOrig = cacheOrig.getInstitutions().get(2).getEndpoint(0);
+        Endpoint epRewritten = cacheRewritten.getInstitutions().get(2).getEndpoint(0);
+        Assert.assertEquals(epOrig.getUrl(), epRewritten.getUrl());
+        Assert.assertEquals(epOrig, epRewritten);
+        List<Corpus> rootCorporaOrig = cacheOrig.getRootCorporaOfEndpoint(epOrig.getUrl());
+        List<Corpus> rootCorporaRewritten = cacheRewritten.getRootCorporaOfEndpoint(epOrig.getUrl());
+        Assert.assertEquals(rootCorporaOrig.size(), rootCorporaRewritten.size());
+        Assert.assertEquals(3, rootCorporaRewritten.size());
+        Assert.assertEquals(rootCorporaOrig.get(0), rootCorporaRewritten.get(0));
+        List<Corpus> childenOrig = cacheOrig.getChildren(rootCorporaOrig.get(0));
+        List<Corpus> childenRewritten = cacheRewritten.getChildren(rootCorporaOrig.get(0));
+        Assert.assertEquals(childenOrig, childenRewritten);
+        Assert.assertEquals(rootCorporaOrig.get(0).getLanguages(), rootCorporaRewritten.get(0).getLanguages());
+@Ignore
+public class ScanCacheFileTest {
+        @Test
+        public void testReadWriteDepth1() {
+                String scanDir = "/scan-bas";
+                String scanPath1 = this.getClass().getResource(scanDir).getFile();
+                String scanPath2 = "/tmp/scan-bas";
+                File scanDir2 = new File(scanPath2);
+                if (!scanDir2.exists()) {
+                        scanDir2.mkdir();
+                }
+                ScanCacheFile scanFiled1 = new ScanCacheFile(scanPath1);
+                ScanCache cacheOrig = scanFiled1.read();
+                ScanCacheFile scanFiled2 = new ScanCacheFile(scanPath2);
+                scanFiled2.write(cacheOrig);
+                ScanCacheFile scanFiled3 = new ScanCacheFile(scanPath2);
+                ScanCache cacheRewritten = scanFiled3.read();
+                //make sure caches contain the same info after read-write
+                Assert.assertEquals(cacheOrig.getInstitutions().size(), cacheRewritten.getInstitutions().size());
+                Endpoint epOrig = cacheOrig.getInstitutions().get(2).getEndpoint(0);
+                Endpoint epRewritten = cacheRewritten.getInstitutions().get(2).getEndpoint(0);
+                Assert.assertEquals(epOrig.getUrl(), epRewritten.getUrl());
+                Assert.assertEquals(epOrig, epRewritten);
+                List<Corpus> rootCorporaOrig = cacheOrig.getRootCorporaOfEndpoint(epOrig.getUrl());
+                List<Corpus> rootCorporaRewritten = cacheRewritten.getRootCorporaOfEndpoint(epOrig.getUrl());
+                Assert.assertEquals(rootCorporaOrig.size(), rootCorporaRewritten.size());
+                Assert.assertEquals(3, rootCorporaRewritten.size());
+                Assert.assertEquals(rootCorporaOrig.get(0), rootCorporaRewritten.get(0));
+                List<Corpus> childenOrig = cacheOrig.getChildren(rootCorporaOrig.get(0));
+                List<Corpus> childenRewritten = cacheRewritten.getChildren(rootCorporaOrig.get(0));
+                Assert.assertEquals(childenOrig, childenRewritten);
+                Assert.assertEquals(rootCorporaOrig.get(0).getLanguages(), rootCorporaRewritten.get(0).getLanguages());
         //System.out.println(cacheOrig);
         //System.out.println();
         //System.out.println(cacheRewritten);
+    }
     @Test
     public void testReadWriteDepth2() {
         String scanDir = "/scan-mpi";
         String scanPath1 = this.getClass().getResource(scanDir).getFile();
         String scanPath2 = "/tmp/scan-mpi";
         File scanDir2 = new File(scanPath2);
         if (!scanDir2.exists()) {
             scanDir2.mkdir();
+        }
         ScanCacheFiled scanFiled1 = new ScanCacheFiled(scanPath1);
         ScanCache cacheOrig = scanFiled1.read();
         ScanCacheFiled scanFiled2 = new ScanCacheFiled(scanPath2);
         scanFiled2.write(cacheOrig);
         ScanCacheFiled scanFiled3 = new ScanCacheFiled(scanPath2);
         ScanCache cacheRewritten = scanFiled3.read();
         //make sure caches contain the same info after read-write
         Assert.assertEquals(cacheOrig.getInstitutions().size(), cacheRewritten.getInstitutions().size());
         Endpoint epOrig = cacheOrig.getInstitutions().get(4).getEndpoint(0);
         Endpoint epRewritten = cacheRewritten.getInstitutions().get(4).getEndpoint(0);
         Assert.assertEquals(epOrig.getUrl(), epRewritten.getUrl());
         Assert.assertEquals(epOrig, epRewritten);
         List<Corpus> rootCorporaOrig = cacheOrig.getRootCorporaOfEndpoint(epOrig.getUrl());
         List<Corpus> rootCorporaRewritten = cacheRewritten.getRootCorporaOfEndpoint(epOrig.getUrl());
         Assert.assertEquals(rootCorporaOrig.size(), rootCorporaRewritten.size());
         Assert.assertEquals(3, rootCorporaRewritten.size());
         Assert.assertEquals(rootCorporaOrig.get(0), rootCorporaRewritten.get(0));
         List<Corpus> childenOrig = cacheOrig.getChildren(rootCorporaOrig.get(0));
         List<Corpus> childenRewritten = cacheRewritten.getChildren(rootCorporaOrig.get(0));
         Assert.assertEquals(childenOrig, childenRewritten);
         Assert.assertEquals(2, childenRewritten.size());
         Assert.assertEquals(rootCorporaOrig.get(0).getLanguages(), rootCorporaRewritten.get(0).getLanguages());
+                //System.out.println();
+                //System.out.println(cacheRewritten);
+        }
+        @Test
+        public void testReadWriteDepth2() {
+                String scanDir = "/scan-mpi";
+                String scanPath1 = this.getClass().getResource(scanDir).getFile();
+                String scanPath2 = "/tmp/scan-mpi";
+                File scanDir2 = new File(scanPath2);
+                if (!scanDir2.exists()) {
+                        scanDir2.mkdir();
+                }
+                ScanCacheFile scanFiled1 = new ScanCacheFile(scanPath1);
+                ScanCache cacheOrig = scanFiled1.read();
+                ScanCacheFile scanFiled2 = new ScanCacheFile(scanPath2);
+                scanFiled2.write(cacheOrig);
+                ScanCacheFile scanFiled3 = new ScanCacheFile(scanPath2);
+                ScanCache cacheRewritten = scanFiled3.read();
+                //make sure caches contain the same info after read-write
+                Assert.assertEquals(cacheOrig.getInstitutions().size(), cacheRewritten.getInstitutions().size());
+                Endpoint epOrig = cacheOrig.getInstitutions().get(4).getEndpoint(0);
+                Endpoint epRewritten = cacheRewritten.getInstitutions().get(4).getEndpoint(0);
+                Assert.assertEquals(epOrig.getUrl(), epRewritten.getUrl());
+                Assert.assertEquals(epOrig, epRewritten);
+                List<Corpus> rootCorporaOrig = cacheOrig.getRootCorporaOfEndpoint(epOrig.getUrl());
+                List<Corpus> rootCorporaRewritten = cacheRewritten.getRootCorporaOfEndpoint(epOrig.getUrl());
+                Assert.assertEquals(rootCorporaOrig.size(), rootCorporaRewritten.size());
+                Assert.assertEquals(3, rootCorporaRewritten.size());
+                Assert.assertEquals(rootCorporaOrig.get(0), rootCorporaRewritten.get(0));
+                List<Corpus> childenOrig = cacheOrig.getChildren(rootCorporaOrig.get(0));
+                List<Corpus> childenRewritten = cacheRewritten.getChildren(rootCorporaOrig.get(0));
+                Assert.assertEquals(childenOrig, childenRewritten);
+                Assert.assertEquals(2, childenRewritten.size());
+                Assert.assertEquals(rootCorporaOrig.get(0).getLanguages(), rootCorporaRewritten.get(0).getLanguages());
 //        System.out.println(cacheOrig);
 //        System.out.println();
 //        System.out.println(cacheRewritten);
+    }
         @Test
     public void testReadWriteDefaultCorpus() {
         String scanDir = "/scan-def";
         String scanPath1 = this.getClass().getResource(scanDir).getFile();
         String scanPath2 = "/tmp/scan-def";
         File scanDir2 = new File(scanPath2);
         if (!scanDir2.exists()) {
             scanDir2.mkdir();
+        }
         ScanCacheFiled scanFiled1 = new ScanCacheFiled(scanPath1);
         ScanCache cacheOrig = scanFiled1.read();
         ScanCacheFiled scanFiled2 = new ScanCacheFiled(scanPath2);
         scanFiled2.write(cacheOrig);
         ScanCacheFiled scanFiled3 = new ScanCacheFiled(scanPath2);
         ScanCache cacheRewritten = scanFiled3.read();
         //make sure caches contain the same info after read-write
         Assert.assertEquals(cacheOrig.getInstitutions().size(), cacheRewritten.getInstitutions().size());
         Endpoint epOrig = cacheOrig.getInstitutions().get(4).getEndpoint(0);
         Endpoint epRewritten = cacheRewritten.getInstitutions().get(4).getEndpoint(0);
         Assert.assertEquals(epOrig.getUrl(), epRewritten.getUrl());
         Assert.assertEquals(epOrig, epRewritten);
         List<Corpus> rootCorporaOrig = cacheOrig.getRootCorporaOfEndpoint(epOrig.getUrl());
         List<Corpus> rootCorporaRewritten = cacheRewritten.getRootCorporaOfEndpoint(epOrig.getUrl());
         Assert.assertEquals(rootCorporaOrig.size(), rootCorporaRewritten.size());
         Assert.assertEquals(1, rootCorporaRewritten.size());
         Assert.assertEquals(rootCorporaOrig.get(0).getLanguages(), rootCorporaRewritten.get(0).getLanguages());
+        }
+        @Test
+        public void testReadWriteDefaultCorpus() {
+                String scanDir = "/scan-def";
+                String scanPath1 = this.getClass().getResource(scanDir).getFile();
+                String scanPath2 = "/tmp/scan-def";
+                File scanDir2 = new File(scanPath2);
+                if (!scanDir2.exists()) {
+                        scanDir2.mkdir();
+                }
+                ScanCacheFile scanFiled1 = new ScanCacheFile(scanPath1);
+                ScanCache cacheOrig = scanFiled1.read();
+                ScanCacheFile scanFiled2 = new ScanCacheFile(scanPath2);
+                scanFiled2.write(cacheOrig);
+                ScanCacheFile scanFiled3 = new ScanCacheFile(scanPath2);
+                ScanCache cacheRewritten = scanFiled3.read();
+                //make sure caches contain the same info after read-write
+                Assert.assertEquals(cacheOrig.getInstitutions().size(), cacheRewritten.getInstitutions().size());
+                Endpoint epOrig = cacheOrig.getInstitutions().get(4).getEndpoint(0);
+                Endpoint epRewritten = cacheRewritten.getInstitutions().get(4).getEndpoint(0);
+                Assert.assertEquals(epOrig.getUrl(), epRewritten.getUrl());
+                Assert.assertEquals(epOrig, epRewritten);
+                List<Corpus> rootCorporaOrig = cacheOrig.getRootCorporaOfEndpoint(epOrig.getUrl());
+                List<Corpus> rootCorporaRewritten = cacheRewritten.getRootCorporaOfEndpoint(epOrig.getUrl());
+                Assert.assertEquals(rootCorporaOrig.size(), rootCorporaRewritten.size());
+                Assert.assertEquals(1, rootCorporaRewritten.size());
+                Assert.assertEquals(rootCorporaOrig.get(0).getLanguages(), rootCorporaRewritten.get(0).getLanguages());
 //        System.out.println(cacheOrig);
 //        System.out.println();
 //        System.out.println(cacheRewritten);
+    }
         @Test
     public void testReadWrite2Endpoints() {
         String scanDir = "/scan-2ep";
         String scanPath1 = this.getClass().getResource(scanDir).getFile();
         String scanPath2 = "/tmp/scan-2ep";
         File scanDir2 = new File(scanPath2);
         if (!scanDir2.exists()) {
             scanDir2.mkdir();
+        }
         ScanCacheFiled scanFiled1 = new ScanCacheFiled(scanPath1);
         ScanCache cacheOrig = scanFiled1.read();
         ScanCacheFiled scanFiled2 = new ScanCacheFiled(scanPath2);
         scanFiled2.write(cacheOrig);
         ScanCacheFiled scanFiled3 = new ScanCacheFiled(scanPath2);
         ScanCache cacheRewritten = scanFiled3.read();
         //make sure caches contain the same info after read-write
         Assert.assertEquals(cacheOrig.getInstitutions().size(), cacheRewritten.getInstitutions().size());
         Assert.assertEquals(cacheOrig.getRootCorpora().size(), cacheRewritten.getRootCorpora().size());
         Endpoint epOrig = cacheOrig.getInstitutions().get(2).getEndpoint(0);
         Endpoint epRewritten = cacheRewritten.getInstitutions().get(2).getEndpoint(0);
         Assert.assertEquals(epOrig.getUrl(), epRewritten.getUrl());
         Assert.assertEquals(epOrig, epRewritten);
         epOrig = cacheOrig.getInstitutions().get(4).getEndpoint(0);
         epRewritten = cacheRewritten.getInstitutions().get(4).getEndpoint(0);
         Assert.assertEquals(epOrig.getUrl(), epRewritten.getUrl());
         Assert.assertEquals(epOrig, epRewritten);
+        }
+        @Test
+        public void testReadWrite2Endpoints() {
+                String scanDir = "/scan-2ep";
+                String scanPath1 = this.getClass().getResource(scanDir).getFile();
+                String scanPath2 = "/tmp/scan-2ep";
+                File scanDir2 = new File(scanPath2);
+                if (!scanDir2.exists()) {
+                        scanDir2.mkdir();
+                }
+                ScanCacheFile scanFiled1 = new ScanCacheFile(scanPath1);
+                ScanCache cacheOrig = scanFiled1.read();
+                ScanCacheFile scanFiled2 = new ScanCacheFile(scanPath2);
+                scanFiled2.write(cacheOrig);
+                ScanCacheFile scanFiled3 = new ScanCacheFile(scanPath2);
+                ScanCache cacheRewritten = scanFiled3.read();
+                //make sure caches contain the same info after read-write
+                Assert.assertEquals(cacheOrig.getInstitutions().size(), cacheRewritten.getInstitutions().size());
+                Assert.assertEquals(cacheOrig.getRootCorpora().size(), cacheRewritten.getRootCorpora().size());
+                Endpoint epOrig = cacheOrig.getInstitutions().get(2).getEndpoint(0);
+                Endpoint epRewritten = cacheRewritten.getInstitutions().get(2).getEndpoint(0);
+                Assert.assertEquals(epOrig.getUrl(), epRewritten.getUrl());
+                Assert.assertEquals(epOrig, epRewritten);
+                epOrig = cacheOrig.getInstitutions().get(4).getEndpoint(0);
+                epRewritten = cacheRewritten.getInstitutions().get(4).getEndpoint(0);
+                Assert.assertEquals(epOrig.getUrl(), epRewritten.getUrl());
+                Assert.assertEquals(epOrig, epRewritten);
 //        List<Corpus> rootCorporaOrig = cacheOrig.getRootCorporaOfEndpoint(epOrig.getUrl());
 //        List<Corpus> rootCorporaRewritten = cacheRewritten.getRootCorporaOfEndpoint(epOrig.getUrl());
 …
 //        Assert.assertEquals(childenOrig, childenRewritten);
 //        Assert.assertEquals(rootCorporaOrig.get(0).getLanguages(), rootCorporaRewritten.get(0).getLanguages());
         //System.out.println(cacheOrig);
         //System.out.println();
         //System.out.println(cacheRewritten);
+    }
+                //System.out.println();
+                //System.out.println(cacheRewritten);
+        }
+}

SRUAggregator/trunk/src/test/java/eu/clarin/sru/fcs/aggregator/app/ScanCrawlerTest.java

-                      r5037
+                      r5720
 import eu.clarin.sru.client.SRUThreadedClient;
 import eu.clarin.sru.fcs.aggregator.cache.EndpointUrlFilter;
+import eu.clarin.sru.fcs.aggregator.cache.ScanCrawler;
 import eu.clarin.sru.fcs.aggregator.cache.SimpleInMemScanCache;
+import eu.clarin.sru.fcs.aggregator.cache.ScanCrawler;
+import eu.clarin.sru.fcs.aggregator.sopt.CenterRegistryLive;
+import eu.clarin.sru.fcs.aggregator.sopt.Corpus;
+import eu.clarin.sru.fcs.aggregator.registry.CenterRegistryLive;
+import eu.clarin.sru.fcs.aggregator.registry.Corpus;
 import java.util.HashSet;
 import java.util.Set;
 import org.junit.Assert;
+import org.junit.Ignore;
 import org.junit.Test;
 …
  * @author yanapanchenko
  */
+@Ignore
 public class ScanCrawlerTest {
+//    @Test
+//    public void testCrawlForMpiAndTue() {
+//
+//        SRUThreadedClient sruClient = new SRUThreadedClient();
+//
+//        try {
+//            EndpointUrlFilter filter = new EndpointUrlFilter();
+//            //filter.urlShouldContainAnyOf("leipzig", ".mpi.nl");
+//            filter.urlShouldContainAnyOf("uni-tuebingen.de", ".mpi.nl");
+//            //filter.urlShouldContainAnyOf("dspin.dwds.de", "lindat.");
+//            ScanCrawler crawler = new ScanCrawler(new CenterRegistryLive(), sruClient, filter, 2);
+//            SimpleInMemScanCache cache = new SimpleInMemScanCache();
+//            crawler.crawl(cache);
+//            Corpus tueRootCorpus = cache.getRootCorporaOfEndpoint("http://weblicht.sfs.uni-tuebingen.de/rws/sru/").get(0);
+//            Corpus mpiRootCorpus = cache.getRootCorporaOfEndpoint("http://cqlservlet.mpi.nl/").get(0);
+//            Assert.assertEquals("http://hdl.handle.net/11858/00-1778-0000-0001-DDAF-D",
+//                    tueRootCorpus.getHandle());
+//            Corpus mpiCorpus = cache.getCorpus("hdl:1839/00-0000-0000-0001-53A5-2@format=cmdi");
+//            Assert.assertEquals("hdl:1839/00-0000-0000-0003-4692-D@format=cmdi", cache.getChildren(mpiCorpus).get(0).getHandle());
+//            //check if languages and other corpus data is crawled corectly...
+//            Set<String> tueLangs = new HashSet<String>();
+//            tueLangs.add("deu");
+//            Assert.assertEquals(tueLangs, tueRootCorpus.getLanguages());
+//            String tueDescSubstring = "TÃŒbingen Treebank";
+//            Assert.assertTrue("Description problem", tueRootCorpus.getDescription().contains(tueDescSubstring));
+//            String tueNameSubstring = "TuebaDDC";
+//            Assert.assertTrue("Name problem", tueRootCorpus.getDisplayName().contains(tueNameSubstring));
+//            String tuePageSubstring = "sfs.uni-tuebingen.de";
+//            Assert.assertTrue("Landing page problem", tueRootCorpus.getLandingPage().contains(tuePageSubstring));
+//            Assert.assertTrue("Number of records problem", mpiRootCorpus.getNumberOfRecords() > 10);
+//
+//        } finally {
+//            sruClient.shutdown();
+//        }
+//
+//    }
+        @Test
+        public void testCrawlForMpiAndTue() {
+                SRUThreadedClient sruClient = new SRUThreadedClient();
+                try {
+                        EndpointUrlFilter filter = new EndpointUrlFilter(
+                                        "uni-tuebingen.de", ".mpi.nl" //, "leipzig", ".mpi.nl", "dspin.dwds.de", "lindat."
+                        );
+                        ScanCrawler crawler = new ScanCrawler(new CenterRegistryLive(), sruClient, filter, 2);
+                        SimpleInMemScanCache cache = new SimpleInMemScanCache();
+                        crawler.crawl(cache);
+                        Corpus tueRootCorpus = cache.getRootCorporaOfEndpoint("http://weblicht.sfs.uni-tuebingen.de/rws/sru/").get(0);
+                        Corpus mpiRootCorpus = cache.getRootCorporaOfEndpoint("http://cqlservlet.mpi.nl/").get(0);
+                        Assert.assertEquals("http://hdl.handle.net/11858/00-1778-0000-0001-DDAF-D",
+                                        tueRootCorpus.getHandle());
+                        Corpus mpiCorpus = cache.getCorpus("hdl:1839/00-0000-0000-0001-53A5-2@format=cmdi");
+                        Assert.assertEquals("hdl:1839/00-0000-0000-0003-4692-D@format=cmdi", cache.getChildren(mpiCorpus).get(0).getHandle());
+                        //check if languages and other corpus data is crawled corectly...
+                        Set<String> tueLangs = new HashSet<String>();
+                        tueLangs.add("deu");
+                        Assert.assertEquals(tueLangs, tueRootCorpus.getLanguages());
+                        String tueDescSubstring = "TÃŒbingen Treebank";
+                        Assert.assertTrue("Description problem", tueRootCorpus.getDescription().contains(tueDescSubstring));
+                        String tueNameSubstring = "TuebaDDC";
+                        Assert.assertTrue("Name problem", tueRootCorpus.getDisplayName().contains(tueNameSubstring));
+                        String tuePageSubstring = "sfs.uni-tuebingen.de";
+                        Assert.assertTrue("Landing page problem", tueRootCorpus.getLandingPage().contains(tuePageSubstring));
+                        Assert.assertTrue("Number of records problem", mpiRootCorpus.getNumberOfRecords() > 10);
+                } finally {
+                        sruClient.shutdown();
+                }
+        }
+}

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 5720 for SRUAggregator

Legend:

Download in other formats: