Changeset 801


Ignore:
Timestamp:
10/18/10 08:28:46 (14 years ago)
Author:
patdui
Message:
  • got some more config stuff out of the code
  • Fixed lots of minor things
Location:
vlo/trunk/vlo_webapp
Files:
3 added
15 edited

Legend:

Unmodified
Added
Removed
  • vlo/trunk/vlo_webapp/pom.xml

    r758 r801  
    7070    </dependency>
    7171    <dependency>
    72         <groupId>org.springframework</groupId>
    73         <artifactId>spring-context</artifactId>
    74         <version>2.5.6</version>
     72      <groupId>org.springframework</groupId>
     73      <artifactId>spring-context</artifactId>
     74      <version>2.5.6</version>
    7575    </dependency>
    7676
     
    9292        <filtering>true</filtering>
    9393        <directory>src/main/resources</directory>
     94        <excludes>
     95          <exclude>importerConfig.xml</exclude> <!-- Not needed for normal build used in Importer and tests -->
     96        </excludes>
    9497      </resource>
    9598      <resource>
     
    116119      </plugin>
    117120      <!--
    118         Assembly helper to create the Importer tool a command line tool which can help fill in a solr server with
    119         cmdi files. Build the tool with: mvn -Dpackaging=jar -Dmaven.test.skip=true assembly:assembly
    120         This creates a in "target" a vlo_solr-1.0-SNAPSHOT-Importer directory with a bin and share
    121         directory. You can start the tool from the bin dir.
     121        Assembly helper to create the Importer tool a command line tool which can help fill in a solr server with cmdi
     122        files. Build the tool with: mvn -Dpackaging=jar -Dmaven.test.skip=true assembly:assembly This creates a in
     123        "target" a vlo_solr-1.0-SNAPSHOT-Importer directory with a bin and share directory. You can start the tool from
     124        the bin dir.
    122125      -->
    123126      <plugin>
     
    131134          </descriptors>
    132135        </configuration>
    133       </plugin>     
     136      </plugin>
    134137    </plugins>
    135138  </build>
  • vlo/trunk/vlo_webapp/src/main/assembly/importer.xml

    r758 r801  
    11<?xml version="1.0" encoding="UTF-8"?>
    22<assembly>
    3     <id>importer</id>
    4     <formats>
    5         <format>dir</format>
    6     </formats>
    7     <includeBaseDirectory>false</includeBaseDirectory>
    8     <dependencySets>
    9       <dependencySet>
    10         <useDefaultExcludes/>
    11         <outputDirectory>/share</outputDirectory>
    12       </dependencySet>
    13     </dependencySets>
    14     <fileSets>
    15         <fileSet>
    16             <directory>src/main/bin</directory>
    17             <includes>
    18                 <include>log4j.properties</include> <!-- Copy log4j to the bin dir to make it a bit easier to change values in it -->
    19             </includes>
    20             <outputDirectory>bin</outputDirectory>
    21         </fileSet>
    22         <fileSet>
    23             <directory>src/main/bin</directory>
    24             <includes>
    25                 <include>*.sh</include>
    26             </includes>
    27             <outputDirectory>bin</outputDirectory>
    28             <fileMode>0755</fileMode>
    29         </fileSet>
    30     </fileSets>
     3  <id>importer</id>
     4  <formats>
     5    <format>dir</format>
     6  </formats>
     7  <includeBaseDirectory>false</includeBaseDirectory>
     8  <dependencySets>
     9    <dependencySet>
     10      <useDefaultExcludes />
     11      <outputDirectory>/share</outputDirectory>
     12    </dependencySet>
     13  </dependencySets>
     14  <fileSets>
     15    <fileSet>
     16      <directory>src/main/bin</directory>
     17      <includes>
     18        <include>log4j.properties</include> <!-- Copy log4j to the bin dir to make it a bit easier to change values in it -->
     19      </includes>
     20      <outputDirectory>bin</outputDirectory>
     21    </fileSet>
     22    <fileSet>
     23      <directory>src/main/bin</directory>
     24      <includes>
     25        <include>*.sh</include>
     26      </includes>
     27      <outputDirectory>bin</outputDirectory>
     28      <fileMode>0755</fileMode>
     29    </fileSet>
     30    <fileSet>
     31      <directory>src/main/resources</directory>
     32      <includes>
     33        <include>importerConfig.xml</include>
     34      </includes>
     35      <outputDirectory>bin</outputDirectory>
     36    </fileSet>
     37  </fileSets>
    3138</assembly>
  • vlo/trunk/vlo_webapp/src/main/bin/vlo_solr_importer.sh

    r758 r801  
    22
    33LIB=../share
    4 CLASSPATH=.:./log4j.properties
     4CLASSPATH=.:./log4j.properties:./importerConfig.xml
    55JAVA=java
    66
  • vlo/trunk/vlo_webapp/src/main/java/eu/clarin/cmdi/vlo/importer/CMDIData.java

    r750 r801  
    2121            doc = new SolrInputDocument();
    2222        }
    23         doc.addField(name, value);
     23        if (value != null && !value.isEmpty()) {
     24            doc.addField(name, value);
     25        }
    2426    }
    2527
  • vlo/trunk/vlo_webapp/src/main/java/eu/clarin/cmdi/vlo/importer/CMDIDigester.java

    r750 r801  
    44import java.io.FileInputStream;
    55import java.io.IOException;
     6import java.util.Map;
    67
    78import org.apache.commons.digester.Digester;
     
    1112import org.xml.sax.helpers.XMLReaderFactory;
    1213
    13 
    1414public class CMDIDigester {
    1515
    1616    private Digester digester;
     17    private final FacetMapping facetMapping;
    1718
    18     public CMDIDigester() {
     19    public CMDIDigester(FacetMapping facetMapping) {
     20        this.facetMapping = facetMapping;
    1921        try {
    2022            digester = createDigester();
     
    3739
    3840        digester.addObjectCreate("CMD", CMDIData.class);
    39         digester.addBeanPropertySetter("CMD/Header/MdSelfLink", "id");
     41        digester.addBeanPropertySetter(facetMapping.getIdMapping(), "id");
    4042        digester.addCallMethod("CMD/Resources/ResourceProxyList/ResourceProxy/", "addResource", 2);
    4143        digester.addCallParam("CMD/Resources/ResourceProxyList/ResourceProxy/ResourceRef", 0);
    4244        digester.addCallParam("CMD/Resources/ResourceProxyList/ResourceProxy/ResourceType", 1);
    43         matchDocumentField(digester, "CMD/Components/Session/Name", "name");
    44         matchDocumentField(digester, "CMD/Components/Session/MDGroup/Location/Continent", "continent");
    45         matchDocumentField(digester, "CMD/Components/Session/MDGroup/Location/Country", "country");
    46         matchDocumentField(digester, "CMD/Components/Session/MDGroup/Content/Content_Languages/Content_Language/Name", "language");
    47         matchDocumentField(digester, "CMD/Components/Session/MDGroup/Project/Contact/Organisation", "organisation");
    48         matchDocumentField(digester, "CMD/Components/Session/MDGroup/Content/Genre", "genre");
    49         matchDocumentField(digester, "CMD/Components/Session/MDGroup/Content/Subject", "subject");
    50         matchDocumentField(digester, "CMD/Components/Session/descriptions/Description", "description");
     45        Map<String, String> facetMap = facetMapping.getFacetMap();
     46        for (String facet : facetMap.keySet()) {
     47            matchDocumentField(digester, facetMap.get(facet), facet);
     48        }
    5149        return digester;
    5250    }
  • vlo/trunk/vlo_webapp/src/main/java/eu/clarin/cmdi/vlo/importer/MetadataImporter.java

    r759 r801  
    3030
    3131    private Set<String> processedIds = new HashSet<String>();
     32    private List<SolrInputDocument> docs = new ArrayList<SolrInputDocument>();
     33    private final ImporterConfig config;
    3234
    33     /**
    34      * @param args
    35      * @throws MalformedURLException
    36      */
    37     public static void main(String[] args) throws MalformedURLException {
    38         BeanFactory factory = new ClassPathXmlApplicationContext(new String[] { "applicationContext.xml" });
    39         factory.getBean("configuration");
    40         MetadataImporter importer = new MetadataImporter();
    41         importer.startImport();
    42     }
     35    private int nrOFDocumentsUpdated;
     36    private int nrOfNonExistendResourceFiles = 0;
     37    private int nrOfFilesAnalyzed = 0;
     38    private int nrOfFilesWithoutId = 0;
    4339
    44     private List<SolrInputDocument> docs = new ArrayList<SolrInputDocument>();
    45     private int nrOFDocumentsUpdated;
    46 
    47     public MetadataImporter() throws MalformedURLException {
     40    public MetadataImporter(ImporterConfig config) throws MalformedURLException {
     41        this.config = config;
    4842        String solrUrl = Configuration.getInstance().getSolrUrl();
    4943        LOG.info("Initializing Solr Server on " + solrUrl);
     
    6458        originRootMap.put("DoBeS archive", new File(
    6559                "/Users/patdui/data/data/corpora/qfs1/media-archive/dobes_data/Corpusstructure/dobes.imdi.cmdi"));
    66         originRootMap.put("ESF corpus", new File("/Users/patdui/data/data/corpora/esf_conv/Corpusstructure/esf.imdi.cmdi"));
    6760        originRootMap.put("ECHO", new File("/Users/patdui/data/data/corpora/qfs1/media-archive/echo_data/Corpusstructure/echo.imdi.cmdi"));
    6861        originRootMap.put("DBD", new File("/Users/patdui/data/data/corpora/qfs1/media-archive/dbd_data/Corpusstructure/dbd.imdi.cmdi"));
    69         originRootMap
    70                 .put("CGN corpus", new File("/Users/patdui/data/data/corpora/CGN/COREX6/data/meta/imdi_3.0_eaf/corpora/cgn.imdi.cmdi"));
    71         originRootMap.put("IFA corpus", new File("/Users/patdui/data/data/corpora/IFAcorpus/IMDI/IFAcorpus.imdi.cmdi"));
    7262        originRootMap.put("Sign Language", new File(
    7363                "/Users/patdui/data/data/corpora/qfs1/media-archive/Corpusstructure/sign_language.imdi.cmdi"));
     
    8474        originRootMap.put("Humanethologisches Filmarchiv", new File(
    8575                "/Users/patdui/data/data/corpora/qfs1/media-archive/humanethology/Corpusstructure/humanethology.imdi.cmdi"));
    86         originRootMap.put("OLAC Metadata Providers", new File(
    87                 "/Users/patdui/data/data/corpora/qfs1/media-archive/olac/OLAC/Corpusstructure/OLAC.imdi.cmdi")); //TODO PD OLAC is not there yet
    8876        originRootMap.put("SUCA", new File("/Users/patdui/data/data/corpora/qfs1/media-archive/suca_data/Corpusstructure/suca.imdi.cmdi"));
    8977        originRootMap.put("Nijmegen corpora of casual speech", new File(
    9078                "/Users/patdui/data/data/corpora/qfs1/media-archive/casual_speech/Corpusstructure/casual_speech.imdi.cmdi"));
     79
     80        //TODO This file is already added in the above list originRootMap.put("ESF corpus", new File("/Users/patdui/data/data/corpora/qfs1/media-archive/acqui_data/ac-ESF/Corpusstructure/esf.imdi.cmdi"));
     81//TODO PD these two do not exist in the dataset and ESF is different then what I find in the root cmdi file.
     82        //originRootMap.put("IFA corpus", new File("/Users/patdui/data/data/corpora/IFAcorpus/IMDI/IFAcorpus.imdi.cmdi"));
     83//        originRootMap
     84//        .put("CGN corpus", new File("/Users/patdui/data/data/corpora/qfs1/media-archive/NCGN/Corpusstructure/cgn.imdi.cmdi"));
     85
     86        //        originRootMap.put("OLAC Metadata Providers", new File("/Users/patdui/data/olac/olac-cmdi-20101011/collection_root.cmdi"));
     87
     88       
     89        for (File file : originRootMap.values()) {
     90            if (!file.exists()) {
     91                LOG.error("Root file " + file + " does not exist. Probable configuration error so stopping import.");
     92                System.exit(1);
     93            }
     94        }
     95
    9196
    9297        // root file       File file = new File("/Users/patdui/data/data/corpora/qfs1/media-archive/Corpusstructure/MPI.imdi.cmdi");
     
    9499        try {
    95100            solrServer.deleteByQuery("*:*");//Delete the whole solr db.
    96             CMDIDigester digester = new CMDIDigester();
     101            CMDIDigester digester = new CMDIDigester(config.getFacetMapping());
    97102            for (String origin : originRootMap.keySet()) {
    98103                processCmdi(originRootMap.get(origin), origin, digester);
     
    116121        }
    117122        long took = (System.currentTimeMillis() - start) / 1000;
    118         LOG.info("Update of " + nrOFDocumentsUpdated + " took " + took + " secs.");
     123        LOG.info("Found " + nrOfNonExistendResourceFiles + " non existing resources files.");
     124        LOG.info("Found " + nrOfFilesWithoutId + " file(s) without an id.");
     125        LOG.info("Update of " + nrOFDocumentsUpdated + " took " + took + " secs. Total nr of files analyzed " + nrOfFilesAnalyzed);
    119126    }
    120127
    121128    private void processCmdi(File file, String origin, CMDIDigester digester) throws SolrServerException, IOException {
     129        nrOfFilesAnalyzed++;
    122130        CMDIData cmdiData = null;
    123131        try {
     
    139147                    processCmdi(resourceFile, origin, digester);
    140148                } else {
     149                    nrOfNonExistendResourceFiles++;
    141150                    LOG.error("Found nonexistent resource file (" + cmdiResource + ") in cmdi: " + file);
    142151                }
     
    149158            IOException {
    150159        if (cmdiData.getId() == null || cmdiData.getId().isEmpty()) {
     160            nrOfFilesWithoutId++;
    151161            LOG.info("Ignoring document without id, fileName: " + file);
    152162        } else {
     
    169179    }
    170180
     181    /**
     182     * @param args
     183     * @throws MalformedURLException
     184     */
     185    public static void main(String[] args) throws MalformedURLException {
     186        BeanFactory factory = new ClassPathXmlApplicationContext(new String[] { "applicationContext.xml", "importerConfig.xml" });
     187        factory.getBean("configuration");
     188        ImporterConfig config = (ImporterConfig) factory.getBean("importerConfig", ImporterConfig.class);
     189        MetadataImporter importer = new MetadataImporter(config);
     190        importer.startImport();
     191    }
     192
    171193}
  • vlo/trunk/vlo_webapp/src/main/java/eu/clarin/cmdi/vlo/pages/DocumentLinkPanel.java

    r770 r801  
    2121                pageParameters);
    2222        add(docLink);
    23         docLink.add(new Label("docLabel", doc.getFirstValue("name").toString()));
     23        Object nameValue = doc.getFirstValue("name");
     24        String name = "<no name>";
     25        if (nameValue != null) {
     26            name = nameValue.toString();
     27        }
     28        docLink.add(new Label("docLabel", name));
    2429    }
    2530
  • vlo/trunk/vlo_webapp/src/main/java/eu/clarin/cmdi/vlo/pages/FacetedSearchPage.java

    r770 r801  
    6969    @SuppressWarnings("serial")
    7070    private void addFacetColumns() {
    71         GridView<FacetField> facetColumns = new GridView<FacetField>("facetColumns", new SolrFacetDataProvider(query.getSolrQuery().getCopy())) {
     71        GridView<FacetField> facetColumns = new GridView<FacetField>("facetColumns", new SolrFacetDataProvider(query.getSolrQuery()
     72                .getCopy())) {
    7273            @Override
    7374            protected void populateItem(Item<FacetField> item) {
     
    9495            }
    9596        });
    96         searchResultList = new AjaxFallbackDefaultDataTable("searchResults", columns, new SolrDocumentDataProvider(query.getSolrQuery().getCopy()),
    97                 10);
     97        searchResultList = new AjaxFallbackDefaultDataTable("searchResults", columns, new SolrDocumentDataProvider(query.getSolrQuery()
     98                .getCopy()), 10);
    9899        add(searchResultList);
    99100    }
  • vlo/trunk/vlo_webapp/src/main/java/eu/clarin/cmdi/vlo/pages/SearchPageQuery.java

    r770 r801  
    88import org.apache.solr.client.solrj.response.FacetField;
    99import org.apache.solr.client.solrj.response.FacetField.Count;
     10import org.apache.solr.client.solrj.util.ClientUtils;
    1011import org.apache.solr.common.params.CommonParams;
    1112import org.apache.wicket.PageParameters;
     
    2526    public SearchPageQuery(PageParameters parameters) {
    2627        query = getDefaultQuery();
    27         query.setQuery(parameters.getString(CommonParams.Q, SOLR_SEARCH_ALL));
     28        String queryParam = parameters.getString(CommonParams.Q);
     29        setSearchQuery(queryParam);
     30        if (queryParam != null) {
     31            query.setQuery(escapeSolrQuery(queryParam));
     32        } else {
     33            query.setQuery(SOLR_SEARCH_ALL);
     34
     35        }
    2836        String[] filterQueries = parameters.getStringArray(CommonParams.FQ);
    29         query.setFilterQueries(filterQueries);
    30         init();
    31     }
    32 
    33     public SearchPageQuery(SolrQuery query) {
    34         this.query = query;
    35         init();
    36     }
    37 
    38     private void init() {
    39         searchQuery = query.getQuery();
    40         String[] filterQueries = query.getFilterQueries();
    4137        if (filterQueries != null) {
    42             for (String fq : filterQueries) {
    43                 String[] keyValue = fq.split(":");
     38            String[] encodedQueries = new String[filterQueries.length];
     39            for (int i = 0; i < filterQueries.length; i++) {
     40                String fq = filterQueries[i];
     41                String[] keyValue = fq.split(":", 2);
    4442                filterQueryMap.put(keyValue[0], keyValue[1]);
     43                encodedQueries[i] = keyValue[0] + ":" + ClientUtils.escapeQueryChars(keyValue[1]);
    4544            }
     45            query.setFilterQueries(encodedQueries);
    4646        }
    4747    }
    4848
     49    //    public static String escapeQueryChars(String s) { //TODO PD copied from solr, removed whitespace escaping this fixes some of the issues I am having. Moet ik solr escapen en dan url escapen?
     50    //        StringBuilder sb = new StringBuilder();
     51    //        for (int i = 0; i < s.length(); i++) {
     52    //          char c = s.charAt(i);
     53    //          // These characters are part of the query syntax and must be escaped
     54    //          if (c == '\\' || c == '+' || c == '-' || c == '!'  || c == '(' || c == ')' || c == ':'
     55    //            || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'
     56    //            || c == '*' || c == '?' || c == '|' || c == '&'  || c == ';'
     57    //            ) {
     58    //            sb.append('\\');
     59    //          }
     60    //          sb.append(c);
     61    //        }
     62    //        return sb.toString();
     63    //      }
     64
     65    private String escapeSolrQuery(String value) {
     66        String result = null;
     67        if (value != null) {
     68            result = ClientUtils.escapeQueryChars(value);
     69        }
     70        return result;
     71    }
     72
     73    private SearchPageQuery(SearchPageQuery searchPageQuery) {
     74        this.query = searchPageQuery.query;
     75        this.filterQueryMap = new HashMap(searchPageQuery.filterQueryMap);
     76        this.searchQuery = searchPageQuery.searchQuery;
     77    }
     78
    4979    public SearchPageQuery getShallowCopy() {
    50         return new SearchPageQuery(query);
     80        return new SearchPageQuery(this);
    5181    }
    5282
     
    82112    }
    83113
     114    public Map<String, String> getFilterQueryMap() {
     115        return filterQueryMap;
     116    }
     117
    84118    public void setSearchQuery(String searchQuery) {
    85119        if (searchQuery == null || searchQuery.isEmpty()) {
    86120            searchQuery = SOLR_SEARCH_ALL;
    87121        }
    88         query.setQuery(searchQuery);
    89122        this.searchQuery = searchQuery;
    90123    }
  • vlo/trunk/vlo_webapp/src/main/java/eu/clarin/cmdi/vlo/pages/ShowAllFacetValuesPage.java

    r770 r801  
    22
    33import java.util.Iterator;
     4import java.util.Map;
    45
    56import org.apache.solr.client.solrj.response.FacetField.Count;
     
    1819        final SearchPageQuery query = new SearchPageQuery(parameters);
    1920
    20         String[] filterQueries = query.getSolrQuery().getFilterQueries();
     21        Map<String, String> filterQueries = query.getFilterQueryMap();
    2122        RepeatingView filteredFacets = new RepeatingView("filteredFacets");
    2223        if (filterQueries != null) {
     
    2425            wmc.add(new Label("filteredFacet", "Selected categories:"));
    2526            filteredFacets.add(wmc);
    26             for (String fq : filterQueries) {
     27            for (String fq : filterQueries.keySet()) {
    2728                wmc = new WebMarkupContainer(filteredFacets.newChildId());
    28                 wmc.add(new Label("filteredFacet", fq));
     29                wmc.add(new Label("filteredFacet", fq + " = " + filterQueries.get(fq)));
    2930                filteredFacets.add(wmc);
    3031            }
  • vlo/trunk/vlo_webapp/src/main/resources/applicationContext.xml

    r770 r801  
    44
    55  <bean id="configuration" class="eu.clarin.cmdi.vlo.Configuration" factory-method="getInstance">
    6     <property name="solrUrl" value="http://localhost:8080/vlo_solr" />
    7     <!--        <property name="solrUrl" value="http://lux16.mpi.nl:8080/vlo_solr" />-->
     6<!--    <property name="solrUrl" value="http://localhost:8080/vlo_solr" />-->
     7    <property name="solrUrl" value="http://lux16.mpi.nl:8080/vlo_solr" />
    88    <property name="IMDIBrowserUrl" value="http://corpus1.mpi.nl/ds/imdi_browser?openpath=" />
    99    <property name="facetFields">
  • vlo/trunk/vlo_webapp/src/main/webapp/WEB-INF/web.xml

    r719 r801  
    1212      <param-value>eu.clarin.cmdi.vlo.VloApplication</param-value>
    1313    </init-param>
     14    <init-param>
     15      <param-name>configuration</param-name>
     16<!--      <param-value>development</param-value>-->
     17       <param-value>deployment</param-value> 
     18      <!-- you can override with -Dwicket.configuration=development -->
     19    </init-param>
    1420  </filter>
    1521
  • vlo/trunk/vlo_webapp/src/main/webapp/css/main.css

    r719 r801  
    885885    color: #ffffff;
    886886}
     887.navigation .navigator {
     888    clear:both;
     889}
  • vlo/trunk/vlo_webapp/src/test/java/eu/clarin/cmdi/vlo/SearchPageQueryTest.java

    r770 r801  
    3333        params = new PageParameters();
    3434        params.add(CommonParams.Q, "test");
    35         params.add(CommonParams.FQ, "country:New\\ Zealand");
     35        params.add(CommonParams.FQ, "country:New Zealand");
    3636        q = new SearchPageQuery(params);
    3737        assertEquals("test", q.getSearchQuery());
     
    4343        assertEquals(1, q.getSolrQuery().getFilterQueries().length);
    4444        assertEquals("country:New\\ Zealand", q.getSolrQuery().getFilterQueries()[0]);
    45         assertEquals("New\\ Zealand", q.getSelectedValue(new FacetField("country")));
     45        assertEquals("New Zealand", q.getSelectedValue(new FacetField("country")));
    4646        assertNull(q.getSelectedValue(new FacetField("genre")));
    4747    }
  • vlo/trunk/vlo_webapp/src/test/java/eu/clarin/cmdi/vlo/importer/CMDIDigesterTest.java

    r750 r801  
    1414import org.junit.BeforeClass;
    1515import org.junit.Test;
     16import org.springframework.beans.factory.BeanFactory;
     17import org.springframework.context.support.ClassPathXmlApplicationContext;
    1618
    1719import eu.clarin.cmdi.vlo.importer.CMDIDigester;
     
    6668        content += "</CMD>\n";
    6769        File cmdiFile = createCmdiFile("testCorpus", content);
    68         CMDIDigester digester = new CMDIDigester();
     70        CMDIDigester digester = new CMDIDigester(getIMDIFacetMap());
    6971        CMDIData data = digester.process(cmdiFile);
    7072        assertEquals("test-hdl:1839/00-0000-0000-0000-0001-D", data.getId());
     
    362364        content += "</CMD>\n";
    363365        File cmdiFile = createCmdiFile("testSession", content);
    364         CMDIDigester digester = new CMDIDigester();
     366        CMDIDigester digester = new CMDIDigester(getIMDIFacetMap());
    365367        CMDIData data = digester.process(cmdiFile);
    366368        assertEquals("test-hdl:1839/00-0000-0000-0009-294C-9", data.getId());
     
    372374        assertEquals("kleve-route", doc.getFieldValue("name"));
    373375        assertEquals("Europe", doc.getFieldValue("continent"));
     376        assertEquals("English", doc.getFieldValue("language"));
    374377        assertEquals("Netherlands", doc.getFieldValue("country"));
    375378        assertEquals("Max Planck Institute for Psycholinguistics", doc.getFieldValue("organisation"));
     
    381384    }
    382385
     386    @Test
     387    public void testEmptyFieldsShouldBeNull() throws Exception {
     388        String content = "";
     389        content += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
     390        content += "<CMD xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n";
     391        content += "     xsi:schemaLocation=\"http://www.clarin.eu/cmd http://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/profiles/clarin.eu:cr1:p_1271859438204/xsd\">\n";
     392        content += "   <Header>\n";
     393        content += "      <MdCreationDate>2008-05-27</MdCreationDate>\n";
     394        content += "      <MdSelfLink>test-hdl:1839/00-0000-0000-0009-294C-9</MdSelfLink>\n";
     395        content += "      <MdProfile>clarin.eu:cr1:p_1271859438204</MdProfile>\n";
     396        content += "   </Header>\n";
     397        content += "   <Resources>\n";
     398        content += "      <ResourceProxyList>\n";
     399        content += "      </ResourceProxyList>\n";
     400        content += "      <JournalFileProxyList/>\n";
     401        content += "      <ResourceRelationList/>\n";
     402        content += "   </Resources>\n";
     403        content += "   <Components>\n";
     404        content += "      <Session>\n";
     405        content += "         <Name>kleve-route</Name>\n";
     406        content += "         <Title>route description to Kleve</Title>\n";
     407        content += "         <Date>2002-10-30</Date>\n";
     408        content += "         <descriptions>\n";
     409        content += "            <Description LanguageId=\"ISO639-2:eng\">Test.</Description>\n";
     410        content += "         </descriptions>\n";
     411        content += "         <MDGroup>\n";
     412        content += "            <Location>\n";
     413        content += "               <Continent>Europe</Continent>\n";
     414        content += "               <Country>Netherlands</Country>\n";
     415        content += "               <Region/>\n";
     416        content += "               <Address>Wundtlaan 1, Nijmegen</Address>\n";
     417        content += "            </Location>\n";
     418        content += "            <Project>\n";
     419        content += "               <Name>Peter Wittenburg</Name>\n";
     420        content += "               <Title>Route description test resource</Title>\n";
     421        content += "               <Id/>\n";
     422        content += "               <Contact>\n";
     423        content += "                  <Name></Name>\n";
     424        content += "                  <Address></Address>\n";
     425        content += "                  <Email></Email>\n";
     426        content += "                  <Organisation></Organisation>\n";
     427        content += "               </Contact>\n";
     428        content += "               <descriptions>\n";
     429        content += "                  <Description LanguageId=\"\"/>\n";
     430        content += "               </descriptions>\n";
     431        content += "            </Project>\n";
     432        content += "            <Keys>\n";
     433        content += "            </Keys>\n";
     434        content += "            <Content>\n";
     435        content += "               <Genre>Unspecified</Genre>\n";
     436        content += "               <SubGenre>Unspecified</SubGenre>\n";
     437        content += "               <Task>route description</Task>\n";
     438        content += "               <Modalities>Speech; Gestures</Modalities>\n";
     439        content += "               <CommunicationContext>\n";
     440        content += "               </CommunicationContext>\n";
     441        content += "               <Content_Languages>\n";
     442        content += "               </Content_Languages>\n";
     443        content += "               <descriptions>\n";
     444        content += "               </descriptions>\n";
     445        content += "            </Content>\n";
     446        content += "            <Actors>\n";
     447        content += "            </Actors>\n";
     448        content += "         </MDGroup>\n";
     449        content += "         <Resources>\n";
     450        content += "         </Resources>\n";
     451        content += "      </Session>\n";
     452        content += "   </Components>\n";
     453        content += "</CMD>\n";
     454        File cmdiFile = createCmdiFile("testSession", content);
     455        CMDIDigester digester = new CMDIDigester(getIMDIFacetMap());
     456        CMDIData data = digester.process(cmdiFile);
     457        assertEquals("test-hdl:1839/00-0000-0000-0009-294C-9", data.getId());
     458        List<String> resources = data.getResources();
     459        assertEquals(0, resources.size());
     460        SolrInputDocument doc = data.getSolrDocument();
     461        assertNotNull(doc);
     462        assertEquals(5, doc.getFieldNames().size());
     463        assertEquals("kleve-route", doc.getFieldValue("name"));
     464        assertEquals("Europe", doc.getFieldValue("continent"));
     465        assertEquals("Netherlands", doc.getFieldValue("country"));
     466        assertEquals("Unspecified", doc.getFieldValue("genre"));
     467        assertEquals("Test.", doc.getFieldValue("description"));
     468        assertEquals("Should be null not empty string", null, doc.getFieldValue("organisation"));
     469        assertEquals(null, doc.getFieldValue("language"));
     470        assertEquals(null, doc.getFieldValue("subject"));
     471    }
     472
     473    @Test
     474    public void testOlac() throws Exception {
     475        String content = "";
     476        content += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
     477        content += "<CMD xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n";
     478        content += "     xmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n";
     479        content += "     xmlns:oai_dc=\"http://www.openarchives.org/OAI/2.0/oai_dc/\"\n";
     480        content += "     xmlns:defns=\"http://www.openarchives.org/OAI/2.0/\"\n";
     481        content += "     xsi:schemaLocation=\"http://www.clarin.eu/cmd http://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/profiles/clarin.eu:cr1:p_1271859438236/xsd\">\n";
     482        content += "   <Header>\n";
     483        content += "      <MdCreator>olac2cmdi.xsl</MdCreator>\n";
     484        content += "      <MdCreationDate>2002-12-14</MdCreationDate>\n";
     485        content += "      <MdSelfLink>oai:ailla.utexas.edu:1</MdSelfLink>\n";
     486        content += "      <MdProfile>clarin.eu:cr1:p_1271859438236</MdProfile>\n";
     487        content += "   </Header>\n";
     488        content += "   <Resources>\n";
     489        content += "      <ResourceProxyList/>\n";
     490        content += "      <JournalFileProxyList/>\n";
     491        content += "      <ResourceRelationList/>\n";
     492        content += "   </Resources>\n";
     493        content += "   <Components>\n";
     494        content += "      <olac>\n";
     495        content += "         <olac-creator>Joel Sherzer (recorder)</olac-creator>\n";
     496        content += "         <olac-description>\n";
     497        content += "    Channel: Talking;\n";
     498        content += "    Genre: Traditional Narrative / Story;\n";
     499        content += "    Country: Panama;\n";
     500        content += "    Place of Recording: Mulatuppu;\n";
     501        content += "    Event: Community Gathering;\n";
     502        content += "    Institutional Affiliation: University of Texas at Austin;\n";
     503        content += "    Participant Information: Political Leader;\n";
     504        content += "      </olac-description>\n";
     505        content += "         <olac-description>The one-eyed grandmother is one of many traditional Kuna stories performed in the Kuna gathering house. This story, performed here by Pedro Arias, combines European derived motifs (Tom Thumb and Hansel and Gretel) with themes that seem more Kuna in origin. All are woven together and a moral is provided. Pedro Arias performed this story before a gathered audience in the morning..\n";
     506        content += "      </olac-description>\n";
     507        content += "         <olac-identifier>http://uts.cc.utexas.edu/~ailla/audio/sherzer/one_eyed_grandmother.ram</olac-identifier>\n";
     508        content += "         <olac-identifier>http://uts.cc.utexas.edu/~ailla/texts/sherzer/one_eyed_grandmother.pdf</olac-identifier>\n";
     509        content += "         <olac-language/>\n";
     510        content += "         <olac-subject>Kuna</olac-subject>\n";
     511        content += "         <type>Transcription</type>\n";
     512        content += "      </olac>\n";
     513        content += "   </Components>\n";
     514        content += "</CMD>\n";
     515
     516        File cmdiFile = createCmdiFile("testOlac", content);
     517        CMDIDigester digester = new CMDIDigester(getOlacFacetMap());
     518        CMDIData data = digester.process(cmdiFile);
     519        assertEquals("oai:ailla.utexas.edu:1", data.getId());
     520        List<String> resources = data.getResources();
     521        assertEquals(0, resources.size());
     522        SolrInputDocument doc = data.getSolrDocument();
     523        assertNotNull(doc);
     524        assertEquals(3, doc.getFieldNames().size());
     525        assertEquals(null, doc.getFieldValue("name"));
     526        assertEquals(null, doc.getFieldValue("continent"));
     527        assertEquals(null, doc.getFieldValue("language"));
     528        assertEquals(null, doc.getFieldValue("country"));
     529        assertEquals(null, doc.getFieldValue("organisation"));
     530        assertEquals("Transcription", doc.getFieldValue("genre"));
     531        assertEquals("Kuna", doc.getFieldValue("subject"));
     532        assertEquals(2, doc.getFieldValues("description").size());
     533    }
     534
     535    @Test
     536    public void testOlacCollection() throws Exception {
     537        String content = "";
     538        content += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
     539        content += "<CMD xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n";
     540        content += "    xsi:schemaLocation=\"http://www.clarin.eu/cmd http://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/profiles/clarin.eu:cr1:p_1271859438236/xsd\">\n";
     541        content += "    <Header>\n";
     542        content += "        <MdCreator>dir2cmdicollection.py</MdCreator>\n";
     543        content += "        <MdCreationDate>2010-10-11</MdCreationDate>\n";
     544        content += "        <MdSelfLink>collection_ATILF_Resources.cmdi</MdSelfLink>\n";
     545        content += "        <MdProfile>clarin.eu:cr1:p_1271859438236</MdProfile>\n";
     546        content += "    </Header>\n";
     547        content += "    <Resources>\n";
     548        content += "        <ResourceProxyList>\n";
     549        content += "<ResourceProxy id=\"ATILF_Resources_0_oai_atilf_inalf_fr_0001.xml.cmdi\"><ResourceType>Metadata</ResourceType><ResourceRef>ATILF_Resources/0/oai_atilf_inalf_fr_0001.xml.cmdi</ResourceRef></ResourceProxy>\n";
     550        content += "<ResourceProxy id=\"ATILF_Resources_0_oai_atilf_inalf_fr_0002.xml.cmdi\"><ResourceType>Metadata</ResourceType><ResourceRef>ATILF_Resources/0/oai_atilf_inalf_fr_0002.xml.cmdi</ResourceRef></ResourceProxy>\n";
     551        content += "<ResourceProxy id=\"ATILF_Resources_0_oai_atilf_inalf_fr_0003.xml.cmdi\"><ResourceType>Metadata</ResourceType><ResourceRef>ATILF_Resources/0/oai_atilf_inalf_fr_0003.xml.cmdi</ResourceRef></ResourceProxy>\n";
     552        content += "<ResourceProxy id=\"ATILF_Resources_0_oai_atilf_inalf_fr_0004.xml.cmdi\"><ResourceType>Metadata</ResourceType><ResourceRef>ATILF_Resources/0/oai_atilf_inalf_fr_0004.xml.cmdi</ResourceRef></ResourceProxy>\n";
     553        content += "<ResourceProxy id=\"ATILF_Resources_0_oai_atilf_inalf_fr_0005_a.xml.cmdi\"><ResourceType>Metadata</ResourceType><ResourceRef>ATILF_Resources/0/oai_atilf_inalf_fr_0005_a.xml.cmdi</ResourceRef></ResourceProxy>\n";
     554        content += "<ResourceProxy id=\"ATILF_Resources_0_oai_atilf_inalf_fr_0005_b.xml.cmdi\"><ResourceType>Metadata</ResourceType><ResourceRef>ATILF_Resources/0/oai_atilf_inalf_fr_0005_b.xml.cmdi</ResourceRef></ResourceProxy>\n";
     555        content += "<ResourceProxy id=\"ATILF_Resources_0_oai_atilf_inalf_fr_0006.xml.cmdi\"><ResourceType>Metadata</ResourceType><ResourceRef>ATILF_Resources/0/oai_atilf_inalf_fr_0006.xml.cmdi</ResourceRef></ResourceProxy>\n";
     556        content += "<ResourceProxy id=\"ATILF_Resources_0_oai_atilf_inalf_fr_M277.xml.cmdi\"><ResourceType>Metadata</ResourceType><ResourceRef>ATILF_Resources/0/oai_atilf_inalf_fr_M277.xml.cmdi</ResourceRef></ResourceProxy>\n";
     557        content += "<ResourceProxy id=\"ATILF_Resources_0_oai_atilf_inalf_fr_M592.xml.cmdi\"><ResourceType>Metadata</ResourceType><ResourceRef>ATILF_Resources/0/oai_atilf_inalf_fr_M592.xml.cmdi</ResourceRef></ResourceProxy>\n";
     558        content += "        </ResourceProxyList>\n";
     559        content += "        <JournalFileProxyList/>\n";
     560        content += "        <ResourceRelationList/>\n";
     561        content += "    </Resources>\n";
     562        content += "    <Components>\n";
     563        content += "        <olac></olac>\n";
     564        content += "    </Components>\n";
     565        content += "</CMD>\n";
     566
     567        File cmdiFile = createCmdiFile("testOlac", content);
     568        CMDIDigester digester = new CMDIDigester(getOlacFacetMap());
     569        CMDIData data = digester.process(cmdiFile);
     570        assertEquals("collection_ATILF_Resources.cmdi", data.getId());
     571        List<String> resources = data.getResources();
     572        assertEquals(9, resources.size());
     573        SolrInputDocument doc = data.getSolrDocument();
     574        assertNull(doc);
     575
     576    }
     577
     578    private FacetMapping getOlacFacetMap() {
     579        BeanFactory factory = new ClassPathXmlApplicationContext(new String[] { "importerConfig.xml" }); //TODO PD doesn't work on the command line.
     580        FacetMapping facetMapping = (FacetMapping) factory.getBean("olacMapping");
     581        return facetMapping;
     582    }
     583
     584    private FacetMapping getIMDIFacetMap() {
     585        BeanFactory factory = new ClassPathXmlApplicationContext(new String[] { "importerConfig.xml" });
     586        FacetMapping facetMapping = (FacetMapping) factory.getBean("imdiMapping");
     587        return facetMapping;
     588    }
     589
    383590    private File createCmdiFile(String name, String content) throws IOException {
    384591        File file = File.createTempFile(name, "cmdi", testDir);
Note: See TracChangeset for help on using the changeset viewer.