Changeset 801
- Timestamp:
- 10/18/10 08:28:46 (14 years ago)
- Location:
- vlo/trunk/vlo_webapp
- Files:
-
- 3 added
- 15 edited
Legend:
- Unmodified
- Added
- Removed
-
vlo/trunk/vlo_webapp/pom.xml
r758 r801 70 70 </dependency> 71 71 <dependency> 72 73 74 72 <groupId>org.springframework</groupId> 73 <artifactId>spring-context</artifactId> 74 <version>2.5.6</version> 75 75 </dependency> 76 76 … … 92 92 <filtering>true</filtering> 93 93 <directory>src/main/resources</directory> 94 <excludes> 95 <exclude>importerConfig.xml</exclude> <!-- Not needed for normal build used in Importer and tests --> 96 </excludes> 94 97 </resource> 95 98 <resource> … … 116 119 </plugin> 117 120 <!-- 118 Assembly helper to create the Importer tool a command line tool which can help fill in a solr server with 119 cmdi files. Build the tool with: mvn -Dpackaging=jar -Dmaven.test.skip=true assembly:assembly120 This creates a in "target" a vlo_solr-1.0-SNAPSHOT-Importer directory with a bin and share121 directory. You can start the tool fromthe bin dir.121 Assembly helper to create the Importer tool a command line tool which can help fill in a solr server with cmdi 122 files. Build the tool with: mvn -Dpackaging=jar -Dmaven.test.skip=true assembly:assembly This creates a in 123 "target" a vlo_solr-1.0-SNAPSHOT-Importer directory with a bin and share directory. You can start the tool from 124 the bin dir. 122 125 --> 123 126 <plugin> … … 131 134 </descriptors> 132 135 </configuration> 133 </plugin> 136 </plugin> 134 137 </plugins> 135 138 </build> -
vlo/trunk/vlo_webapp/src/main/assembly/importer.xml
r758 r801 1 1 <?xml version="1.0" encoding="UTF-8"?> 2 2 <assembly> 3 <id>importer</id> 4 <formats> 5 <format>dir</format> 6 </formats> 7 <includeBaseDirectory>false</includeBaseDirectory> 8 <dependencySets> 9 <dependencySet> 10 <useDefaultExcludes/> 11 <outputDirectory>/share</outputDirectory> 12 </dependencySet> 13 </dependencySets> 14 <fileSets> 15 <fileSet> 16 <directory>src/main/bin</directory> 17 <includes> 18 <include>log4j.properties</include> <!-- Copy log4j to the bin dir to make it a bit easier to change values in it --> 19 </includes> 20 <outputDirectory>bin</outputDirectory> 21 </fileSet> 22 <fileSet> 23 <directory>src/main/bin</directory> 24 <includes> 25 <include>*.sh</include> 26 </includes> 27 <outputDirectory>bin</outputDirectory> 28 <fileMode>0755</fileMode> 29 </fileSet> 30 </fileSets> 3 <id>importer</id> 4 <formats> 5 <format>dir</format> 6 </formats> 7 <includeBaseDirectory>false</includeBaseDirectory> 8 <dependencySets> 9 <dependencySet> 10 <useDefaultExcludes /> 11 <outputDirectory>/share</outputDirectory> 12 </dependencySet> 13 </dependencySets> 14 <fileSets> 15 <fileSet> 16 <directory>src/main/bin</directory> 17 <includes> 18 <include>log4j.properties</include> <!-- Copy log4j to the bin dir to make it a bit easier to change values in it --> 19 </includes> 20 <outputDirectory>bin</outputDirectory> 21 </fileSet> 22 <fileSet> 23 <directory>src/main/bin</directory> 24 <includes> 25 <include>*.sh</include> 26 </includes> 27 <outputDirectory>bin</outputDirectory> 28 <fileMode>0755</fileMode> 29 </fileSet> 30 <fileSet> 31 <directory>src/main/resources</directory> 32 <includes> 33 <include>importerConfig.xml</include> 34 </includes> 35 <outputDirectory>bin</outputDirectory> 36 </fileSet> 37 </fileSets> 31 38 </assembly> -
vlo/trunk/vlo_webapp/src/main/bin/vlo_solr_importer.sh
r758 r801 2 2 3 3 LIB=../share 4 CLASSPATH=.:./log4j.properties 4 CLASSPATH=.:./log4j.properties:./importerConfig.xml 5 5 JAVA=java 6 6 -
vlo/trunk/vlo_webapp/src/main/java/eu/clarin/cmdi/vlo/importer/CMDIData.java
r750 r801 21 21 doc = new SolrInputDocument(); 22 22 } 23 doc.addField(name, value); 23 if (value != null && !value.isEmpty()) { 24 doc.addField(name, value); 25 } 24 26 } 25 27 -
vlo/trunk/vlo_webapp/src/main/java/eu/clarin/cmdi/vlo/importer/CMDIDigester.java
r750 r801 4 4 import java.io.FileInputStream; 5 5 import java.io.IOException; 6 import java.util.Map; 6 7 7 8 import org.apache.commons.digester.Digester; … … 11 12 import org.xml.sax.helpers.XMLReaderFactory; 12 13 13 14 14 public class CMDIDigester { 15 15 16 16 private Digester digester; 17 private final FacetMapping facetMapping; 17 18 18 public CMDIDigester() { 19 public CMDIDigester(FacetMapping facetMapping) { 20 this.facetMapping = facetMapping; 19 21 try { 20 22 digester = createDigester(); … … 37 39 38 40 digester.addObjectCreate("CMD", CMDIData.class); 39 digester.addBeanPropertySetter( "CMD/Header/MdSelfLink", "id");41 digester.addBeanPropertySetter(facetMapping.getIdMapping(), "id"); 40 42 digester.addCallMethod("CMD/Resources/ResourceProxyList/ResourceProxy/", "addResource", 2); 41 43 digester.addCallParam("CMD/Resources/ResourceProxyList/ResourceProxy/ResourceRef", 0); 42 44 digester.addCallParam("CMD/Resources/ResourceProxyList/ResourceProxy/ResourceType", 1); 43 matchDocumentField(digester, "CMD/Components/Session/Name", "name"); 44 matchDocumentField(digester, "CMD/Components/Session/MDGroup/Location/Continent", "continent"); 45 matchDocumentField(digester, "CMD/Components/Session/MDGroup/Location/Country", "country"); 46 matchDocumentField(digester, "CMD/Components/Session/MDGroup/Content/Content_Languages/Content_Language/Name", "language"); 47 matchDocumentField(digester, "CMD/Components/Session/MDGroup/Project/Contact/Organisation", "organisation"); 48 matchDocumentField(digester, "CMD/Components/Session/MDGroup/Content/Genre", "genre"); 49 matchDocumentField(digester, "CMD/Components/Session/MDGroup/Content/Subject", "subject"); 50 matchDocumentField(digester, "CMD/Components/Session/descriptions/Description", "description"); 45 Map<String, String> facetMap = facetMapping.getFacetMap(); 46 for (String facet : facetMap.keySet()) { 47 matchDocumentField(digester, facetMap.get(facet), facet); 48 } 51 49 return digester; 52 50 } -
vlo/trunk/vlo_webapp/src/main/java/eu/clarin/cmdi/vlo/importer/MetadataImporter.java
r759 r801 30 30 31 31 private Set<String> processedIds = new HashSet<String>(); 32 private List<SolrInputDocument> docs = new ArrayList<SolrInputDocument>(); 33 private final ImporterConfig config; 32 34 33 /** 34 * @param args 35 * @throws MalformedURLException 36 */ 37 public static void main(String[] args) throws MalformedURLException { 38 BeanFactory factory = new ClassPathXmlApplicationContext(new String[] { "applicationContext.xml" }); 39 factory.getBean("configuration"); 40 MetadataImporter importer = new MetadataImporter(); 41 importer.startImport(); 42 } 35 private int nrOFDocumentsUpdated; 36 private int nrOfNonExistendResourceFiles = 0; 37 private int nrOfFilesAnalyzed = 0; 38 private int nrOfFilesWithoutId = 0; 43 39 44 private List<SolrInputDocument> docs = new ArrayList<SolrInputDocument>(); 45 private int nrOFDocumentsUpdated; 46 47 public MetadataImporter() throws MalformedURLException { 40 public MetadataImporter(ImporterConfig config) throws MalformedURLException { 41 this.config = config; 48 42 String solrUrl = Configuration.getInstance().getSolrUrl(); 49 43 LOG.info("Initializing Solr Server on " + solrUrl); … … 64 58 originRootMap.put("DoBeS archive", new File( 65 59 "/Users/patdui/data/data/corpora/qfs1/media-archive/dobes_data/Corpusstructure/dobes.imdi.cmdi")); 66 originRootMap.put("ESF corpus", new File("/Users/patdui/data/data/corpora/esf_conv/Corpusstructure/esf.imdi.cmdi"));67 60 originRootMap.put("ECHO", new File("/Users/patdui/data/data/corpora/qfs1/media-archive/echo_data/Corpusstructure/echo.imdi.cmdi")); 68 61 originRootMap.put("DBD", new File("/Users/patdui/data/data/corpora/qfs1/media-archive/dbd_data/Corpusstructure/dbd.imdi.cmdi")); 69 originRootMap70 .put("CGN corpus", new File("/Users/patdui/data/data/corpora/CGN/COREX6/data/meta/imdi_3.0_eaf/corpora/cgn.imdi.cmdi"));71 originRootMap.put("IFA corpus", new File("/Users/patdui/data/data/corpora/IFAcorpus/IMDI/IFAcorpus.imdi.cmdi"));72 62 originRootMap.put("Sign Language", new File( 73 63 "/Users/patdui/data/data/corpora/qfs1/media-archive/Corpusstructure/sign_language.imdi.cmdi")); … … 84 74 originRootMap.put("Humanethologisches Filmarchiv", new File( 85 75 "/Users/patdui/data/data/corpora/qfs1/media-archive/humanethology/Corpusstructure/humanethology.imdi.cmdi")); 86 originRootMap.put("OLAC Metadata Providers", new File(87 "/Users/patdui/data/data/corpora/qfs1/media-archive/olac/OLAC/Corpusstructure/OLAC.imdi.cmdi")); //TODO PD OLAC is not there yet88 76 originRootMap.put("SUCA", new File("/Users/patdui/data/data/corpora/qfs1/media-archive/suca_data/Corpusstructure/suca.imdi.cmdi")); 89 77 originRootMap.put("Nijmegen corpora of casual speech", new File( 90 78 "/Users/patdui/data/data/corpora/qfs1/media-archive/casual_speech/Corpusstructure/casual_speech.imdi.cmdi")); 79 80 //TODO This file is already added in the above list originRootMap.put("ESF corpus", new File("/Users/patdui/data/data/corpora/qfs1/media-archive/acqui_data/ac-ESF/Corpusstructure/esf.imdi.cmdi")); 81 //TODO PD these two do not exist in the dataset and ESF is different then what I find in the root cmdi file. 82 //originRootMap.put("IFA corpus", new File("/Users/patdui/data/data/corpora/IFAcorpus/IMDI/IFAcorpus.imdi.cmdi")); 83 // originRootMap 84 // .put("CGN corpus", new File("/Users/patdui/data/data/corpora/qfs1/media-archive/NCGN/Corpusstructure/cgn.imdi.cmdi")); 85 86 // originRootMap.put("OLAC Metadata Providers", new File("/Users/patdui/data/olac/olac-cmdi-20101011/collection_root.cmdi")); 87 88 89 for (File file : originRootMap.values()) { 90 if (!file.exists()) { 91 LOG.error("Root file " + file + " does not exist. Probable configuration error so stopping import."); 92 System.exit(1); 93 } 94 } 95 91 96 92 97 // root file File file = new File("/Users/patdui/data/data/corpora/qfs1/media-archive/Corpusstructure/MPI.imdi.cmdi"); … … 94 99 try { 95 100 solrServer.deleteByQuery("*:*");//Delete the whole solr db. 96 CMDIDigester digester = new CMDIDigester( );101 CMDIDigester digester = new CMDIDigester(config.getFacetMapping()); 97 102 for (String origin : originRootMap.keySet()) { 98 103 processCmdi(originRootMap.get(origin), origin, digester); … … 116 121 } 117 122 long took = (System.currentTimeMillis() - start) / 1000; 118 LOG.info("Update of " + nrOFDocumentsUpdated + " took " + took + " secs."); 123 LOG.info("Found " + nrOfNonExistendResourceFiles + " non existing resources files."); 124 LOG.info("Found " + nrOfFilesWithoutId + " file(s) without an id."); 125 LOG.info("Update of " + nrOFDocumentsUpdated + " took " + took + " secs. Total nr of files analyzed " + nrOfFilesAnalyzed); 119 126 } 120 127 121 128 private void processCmdi(File file, String origin, CMDIDigester digester) throws SolrServerException, IOException { 129 nrOfFilesAnalyzed++; 122 130 CMDIData cmdiData = null; 123 131 try { … … 139 147 processCmdi(resourceFile, origin, digester); 140 148 } else { 149 nrOfNonExistendResourceFiles++; 141 150 LOG.error("Found nonexistent resource file (" + cmdiResource + ") in cmdi: " + file); 142 151 } … … 149 158 IOException { 150 159 if (cmdiData.getId() == null || cmdiData.getId().isEmpty()) { 160 nrOfFilesWithoutId++; 151 161 LOG.info("Ignoring document without id, fileName: " + file); 152 162 } else { … … 169 179 } 170 180 181 /** 182 * @param args 183 * @throws MalformedURLException 184 */ 185 public static void main(String[] args) throws MalformedURLException { 186 BeanFactory factory = new ClassPathXmlApplicationContext(new String[] { "applicationContext.xml", "importerConfig.xml" }); 187 factory.getBean("configuration"); 188 ImporterConfig config = (ImporterConfig) factory.getBean("importerConfig", ImporterConfig.class); 189 MetadataImporter importer = new MetadataImporter(config); 190 importer.startImport(); 191 } 192 171 193 } -
vlo/trunk/vlo_webapp/src/main/java/eu/clarin/cmdi/vlo/pages/DocumentLinkPanel.java
r770 r801 21 21 pageParameters); 22 22 add(docLink); 23 docLink.add(new Label("docLabel", doc.getFirstValue("name").toString())); 23 Object nameValue = doc.getFirstValue("name"); 24 String name = "<no name>"; 25 if (nameValue != null) { 26 name = nameValue.toString(); 27 } 28 docLink.add(new Label("docLabel", name)); 24 29 } 25 30 -
vlo/trunk/vlo_webapp/src/main/java/eu/clarin/cmdi/vlo/pages/FacetedSearchPage.java
r770 r801 69 69 @SuppressWarnings("serial") 70 70 private void addFacetColumns() { 71 GridView<FacetField> facetColumns = new GridView<FacetField>("facetColumns", new SolrFacetDataProvider(query.getSolrQuery().getCopy())) { 71 GridView<FacetField> facetColumns = new GridView<FacetField>("facetColumns", new SolrFacetDataProvider(query.getSolrQuery() 72 .getCopy())) { 72 73 @Override 73 74 protected void populateItem(Item<FacetField> item) { … … 94 95 } 95 96 }); 96 searchResultList = new AjaxFallbackDefaultDataTable("searchResults", columns, new SolrDocumentDataProvider(query.getSolrQuery() .getCopy()),97 10);97 searchResultList = new AjaxFallbackDefaultDataTable("searchResults", columns, new SolrDocumentDataProvider(query.getSolrQuery() 98 .getCopy()), 10); 98 99 add(searchResultList); 99 100 } -
vlo/trunk/vlo_webapp/src/main/java/eu/clarin/cmdi/vlo/pages/SearchPageQuery.java
r770 r801 8 8 import org.apache.solr.client.solrj.response.FacetField; 9 9 import org.apache.solr.client.solrj.response.FacetField.Count; 10 import org.apache.solr.client.solrj.util.ClientUtils; 10 11 import org.apache.solr.common.params.CommonParams; 11 12 import org.apache.wicket.PageParameters; … … 25 26 public SearchPageQuery(PageParameters parameters) { 26 27 query = getDefaultQuery(); 27 query.setQuery(parameters.getString(CommonParams.Q, SOLR_SEARCH_ALL)); 28 String queryParam = parameters.getString(CommonParams.Q); 29 setSearchQuery(queryParam); 30 if (queryParam != null) { 31 query.setQuery(escapeSolrQuery(queryParam)); 32 } else { 33 query.setQuery(SOLR_SEARCH_ALL); 34 35 } 28 36 String[] filterQueries = parameters.getStringArray(CommonParams.FQ); 29 query.setFilterQueries(filterQueries);30 init();31 }32 33 public SearchPageQuery(SolrQuery query) {34 this.query = query;35 init();36 }37 38 private void init() {39 searchQuery = query.getQuery();40 String[] filterQueries = query.getFilterQueries();41 37 if (filterQueries != null) { 42 for (String fq : filterQueries) { 43 String[] keyValue = fq.split(":"); 38 String[] encodedQueries = new String[filterQueries.length]; 39 for (int i = 0; i < filterQueries.length; i++) { 40 String fq = filterQueries[i]; 41 String[] keyValue = fq.split(":", 2); 44 42 filterQueryMap.put(keyValue[0], keyValue[1]); 43 encodedQueries[i] = keyValue[0] + ":" + ClientUtils.escapeQueryChars(keyValue[1]); 45 44 } 45 query.setFilterQueries(encodedQueries); 46 46 } 47 47 } 48 48 49 // public static String escapeQueryChars(String s) { //TODO PD copied from solr, removed whitespace escaping this fixes some of the issues I am having. Moet ik solr escapen en dan url escapen? 50 // StringBuilder sb = new StringBuilder(); 51 // for (int i = 0; i < s.length(); i++) { 52 // char c = s.charAt(i); 53 // // These characters are part of the query syntax and must be escaped 54 // if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':' 55 // || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~' 56 // || c == '*' || c == '?' || c == '|' || c == '&' || c == ';' 57 // ) { 58 // sb.append('\\'); 59 // } 60 // sb.append(c); 61 // } 62 // return sb.toString(); 63 // } 64 65 private String escapeSolrQuery(String value) { 66 String result = null; 67 if (value != null) { 68 result = ClientUtils.escapeQueryChars(value); 69 } 70 return result; 71 } 72 73 private SearchPageQuery(SearchPageQuery searchPageQuery) { 74 this.query = searchPageQuery.query; 75 this.filterQueryMap = new HashMap(searchPageQuery.filterQueryMap); 76 this.searchQuery = searchPageQuery.searchQuery; 77 } 78 49 79 public SearchPageQuery getShallowCopy() { 50 return new SearchPageQuery( query);80 return new SearchPageQuery(this); 51 81 } 52 82 … … 82 112 } 83 113 114 public Map<String, String> getFilterQueryMap() { 115 return filterQueryMap; 116 } 117 84 118 public void setSearchQuery(String searchQuery) { 85 119 if (searchQuery == null || searchQuery.isEmpty()) { 86 120 searchQuery = SOLR_SEARCH_ALL; 87 121 } 88 query.setQuery(searchQuery);89 122 this.searchQuery = searchQuery; 90 123 } -
vlo/trunk/vlo_webapp/src/main/java/eu/clarin/cmdi/vlo/pages/ShowAllFacetValuesPage.java
r770 r801 2 2 3 3 import java.util.Iterator; 4 import java.util.Map; 4 5 5 6 import org.apache.solr.client.solrj.response.FacetField.Count; … … 18 19 final SearchPageQuery query = new SearchPageQuery(parameters); 19 20 20 String[] filterQueries = query.getSolrQuery().getFilterQueries();21 Map<String, String> filterQueries = query.getFilterQueryMap(); 21 22 RepeatingView filteredFacets = new RepeatingView("filteredFacets"); 22 23 if (filterQueries != null) { … … 24 25 wmc.add(new Label("filteredFacet", "Selected categories:")); 25 26 filteredFacets.add(wmc); 26 for (String fq : filterQueries ) {27 for (String fq : filterQueries.keySet()) { 27 28 wmc = new WebMarkupContainer(filteredFacets.newChildId()); 28 wmc.add(new Label("filteredFacet", fq ));29 wmc.add(new Label("filteredFacet", fq + " = " + filterQueries.get(fq))); 29 30 filteredFacets.add(wmc); 30 31 } -
vlo/trunk/vlo_webapp/src/main/resources/applicationContext.xml
r770 r801 4 4 5 5 <bean id="configuration" class="eu.clarin.cmdi.vlo.Configuration" factory-method="getInstance"> 6 <property name="solrUrl" value="http://localhost:8080/vlo_solr" />7 < !-- <property name="solrUrl" value="http://lux16.mpi.nl:8080/vlo_solr" />-->6 <!-- <property name="solrUrl" value="http://localhost:8080/vlo_solr" />--> 7 <property name="solrUrl" value="http://lux16.mpi.nl:8080/vlo_solr" /> 8 8 <property name="IMDIBrowserUrl" value="http://corpus1.mpi.nl/ds/imdi_browser?openpath=" /> 9 9 <property name="facetFields"> -
vlo/trunk/vlo_webapp/src/main/webapp/WEB-INF/web.xml
r719 r801 12 12 <param-value>eu.clarin.cmdi.vlo.VloApplication</param-value> 13 13 </init-param> 14 <init-param> 15 <param-name>configuration</param-name> 16 <!-- <param-value>development</param-value>--> 17 <param-value>deployment</param-value> 18 <!-- you can override with -Dwicket.configuration=development --> 19 </init-param> 14 20 </filter> 15 21 -
vlo/trunk/vlo_webapp/src/main/webapp/css/main.css
r719 r801 885 885 color: #ffffff; 886 886 } 887 .navigation .navigator { 888 clear:both; 889 } -
vlo/trunk/vlo_webapp/src/test/java/eu/clarin/cmdi/vlo/SearchPageQueryTest.java
r770 r801 33 33 params = new PageParameters(); 34 34 params.add(CommonParams.Q, "test"); 35 params.add(CommonParams.FQ, "country:New \\Zealand");35 params.add(CommonParams.FQ, "country:New Zealand"); 36 36 q = new SearchPageQuery(params); 37 37 assertEquals("test", q.getSearchQuery()); … … 43 43 assertEquals(1, q.getSolrQuery().getFilterQueries().length); 44 44 assertEquals("country:New\\ Zealand", q.getSolrQuery().getFilterQueries()[0]); 45 assertEquals("New \\Zealand", q.getSelectedValue(new FacetField("country")));45 assertEquals("New Zealand", q.getSelectedValue(new FacetField("country"))); 46 46 assertNull(q.getSelectedValue(new FacetField("genre"))); 47 47 } -
vlo/trunk/vlo_webapp/src/test/java/eu/clarin/cmdi/vlo/importer/CMDIDigesterTest.java
r750 r801 14 14 import org.junit.BeforeClass; 15 15 import org.junit.Test; 16 import org.springframework.beans.factory.BeanFactory; 17 import org.springframework.context.support.ClassPathXmlApplicationContext; 16 18 17 19 import eu.clarin.cmdi.vlo.importer.CMDIDigester; … … 66 68 content += "</CMD>\n"; 67 69 File cmdiFile = createCmdiFile("testCorpus", content); 68 CMDIDigester digester = new CMDIDigester( );70 CMDIDigester digester = new CMDIDigester(getIMDIFacetMap()); 69 71 CMDIData data = digester.process(cmdiFile); 70 72 assertEquals("test-hdl:1839/00-0000-0000-0000-0001-D", data.getId()); … … 362 364 content += "</CMD>\n"; 363 365 File cmdiFile = createCmdiFile("testSession", content); 364 CMDIDigester digester = new CMDIDigester( );366 CMDIDigester digester = new CMDIDigester(getIMDIFacetMap()); 365 367 CMDIData data = digester.process(cmdiFile); 366 368 assertEquals("test-hdl:1839/00-0000-0000-0009-294C-9", data.getId()); … … 372 374 assertEquals("kleve-route", doc.getFieldValue("name")); 373 375 assertEquals("Europe", doc.getFieldValue("continent")); 376 assertEquals("English", doc.getFieldValue("language")); 374 377 assertEquals("Netherlands", doc.getFieldValue("country")); 375 378 assertEquals("Max Planck Institute for Psycholinguistics", doc.getFieldValue("organisation")); … … 381 384 } 382 385 386 @Test 387 public void testEmptyFieldsShouldBeNull() throws Exception { 388 String content = ""; 389 content += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; 390 content += "<CMD xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n"; 391 content += " xsi:schemaLocation=\"http://www.clarin.eu/cmd http://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/profiles/clarin.eu:cr1:p_1271859438204/xsd\">\n"; 392 content += " <Header>\n"; 393 content += " <MdCreationDate>2008-05-27</MdCreationDate>\n"; 394 content += " <MdSelfLink>test-hdl:1839/00-0000-0000-0009-294C-9</MdSelfLink>\n"; 395 content += " <MdProfile>clarin.eu:cr1:p_1271859438204</MdProfile>\n"; 396 content += " </Header>\n"; 397 content += " <Resources>\n"; 398 content += " <ResourceProxyList>\n"; 399 content += " </ResourceProxyList>\n"; 400 content += " <JournalFileProxyList/>\n"; 401 content += " <ResourceRelationList/>\n"; 402 content += " </Resources>\n"; 403 content += " <Components>\n"; 404 content += " <Session>\n"; 405 content += " <Name>kleve-route</Name>\n"; 406 content += " <Title>route description to Kleve</Title>\n"; 407 content += " <Date>2002-10-30</Date>\n"; 408 content += " <descriptions>\n"; 409 content += " <Description LanguageId=\"ISO639-2:eng\">Test.</Description>\n"; 410 content += " </descriptions>\n"; 411 content += " <MDGroup>\n"; 412 content += " <Location>\n"; 413 content += " <Continent>Europe</Continent>\n"; 414 content += " <Country>Netherlands</Country>\n"; 415 content += " <Region/>\n"; 416 content += " <Address>Wundtlaan 1, Nijmegen</Address>\n"; 417 content += " </Location>\n"; 418 content += " <Project>\n"; 419 content += " <Name>Peter Wittenburg</Name>\n"; 420 content += " <Title>Route description test resource</Title>\n"; 421 content += " <Id/>\n"; 422 content += " <Contact>\n"; 423 content += " <Name></Name>\n"; 424 content += " <Address></Address>\n"; 425 content += " <Email></Email>\n"; 426 content += " <Organisation></Organisation>\n"; 427 content += " </Contact>\n"; 428 content += " <descriptions>\n"; 429 content += " <Description LanguageId=\"\"/>\n"; 430 content += " </descriptions>\n"; 431 content += " </Project>\n"; 432 content += " <Keys>\n"; 433 content += " </Keys>\n"; 434 content += " <Content>\n"; 435 content += " <Genre>Unspecified</Genre>\n"; 436 content += " <SubGenre>Unspecified</SubGenre>\n"; 437 content += " <Task>route description</Task>\n"; 438 content += " <Modalities>Speech; Gestures</Modalities>\n"; 439 content += " <CommunicationContext>\n"; 440 content += " </CommunicationContext>\n"; 441 content += " <Content_Languages>\n"; 442 content += " </Content_Languages>\n"; 443 content += " <descriptions>\n"; 444 content += " </descriptions>\n"; 445 content += " </Content>\n"; 446 content += " <Actors>\n"; 447 content += " </Actors>\n"; 448 content += " </MDGroup>\n"; 449 content += " <Resources>\n"; 450 content += " </Resources>\n"; 451 content += " </Session>\n"; 452 content += " </Components>\n"; 453 content += "</CMD>\n"; 454 File cmdiFile = createCmdiFile("testSession", content); 455 CMDIDigester digester = new CMDIDigester(getIMDIFacetMap()); 456 CMDIData data = digester.process(cmdiFile); 457 assertEquals("test-hdl:1839/00-0000-0000-0009-294C-9", data.getId()); 458 List<String> resources = data.getResources(); 459 assertEquals(0, resources.size()); 460 SolrInputDocument doc = data.getSolrDocument(); 461 assertNotNull(doc); 462 assertEquals(5, doc.getFieldNames().size()); 463 assertEquals("kleve-route", doc.getFieldValue("name")); 464 assertEquals("Europe", doc.getFieldValue("continent")); 465 assertEquals("Netherlands", doc.getFieldValue("country")); 466 assertEquals("Unspecified", doc.getFieldValue("genre")); 467 assertEquals("Test.", doc.getFieldValue("description")); 468 assertEquals("Should be null not empty string", null, doc.getFieldValue("organisation")); 469 assertEquals(null, doc.getFieldValue("language")); 470 assertEquals(null, doc.getFieldValue("subject")); 471 } 472 473 @Test 474 public void testOlac() throws Exception { 475 String content = ""; 476 content += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; 477 content += "<CMD xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n"; 478 content += " xmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n"; 479 content += " xmlns:oai_dc=\"http://www.openarchives.org/OAI/2.0/oai_dc/\"\n"; 480 content += " xmlns:defns=\"http://www.openarchives.org/OAI/2.0/\"\n"; 481 content += " xsi:schemaLocation=\"http://www.clarin.eu/cmd http://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/profiles/clarin.eu:cr1:p_1271859438236/xsd\">\n"; 482 content += " <Header>\n"; 483 content += " <MdCreator>olac2cmdi.xsl</MdCreator>\n"; 484 content += " <MdCreationDate>2002-12-14</MdCreationDate>\n"; 485 content += " <MdSelfLink>oai:ailla.utexas.edu:1</MdSelfLink>\n"; 486 content += " <MdProfile>clarin.eu:cr1:p_1271859438236</MdProfile>\n"; 487 content += " </Header>\n"; 488 content += " <Resources>\n"; 489 content += " <ResourceProxyList/>\n"; 490 content += " <JournalFileProxyList/>\n"; 491 content += " <ResourceRelationList/>\n"; 492 content += " </Resources>\n"; 493 content += " <Components>\n"; 494 content += " <olac>\n"; 495 content += " <olac-creator>Joel Sherzer (recorder)</olac-creator>\n"; 496 content += " <olac-description>\n"; 497 content += " Channel: Talking;\n"; 498 content += " Genre: Traditional Narrative / Story;\n"; 499 content += " Country: Panama;\n"; 500 content += " Place of Recording: Mulatuppu;\n"; 501 content += " Event: Community Gathering;\n"; 502 content += " Institutional Affiliation: University of Texas at Austin;\n"; 503 content += " Participant Information: Political Leader;\n"; 504 content += " </olac-description>\n"; 505 content += " <olac-description>The one-eyed grandmother is one of many traditional Kuna stories performed in the Kuna gathering house. This story, performed here by Pedro Arias, combines European derived motifs (Tom Thumb and Hansel and Gretel) with themes that seem more Kuna in origin. All are woven together and a moral is provided. Pedro Arias performed this story before a gathered audience in the morning..\n"; 506 content += " </olac-description>\n"; 507 content += " <olac-identifier>http://uts.cc.utexas.edu/~ailla/audio/sherzer/one_eyed_grandmother.ram</olac-identifier>\n"; 508 content += " <olac-identifier>http://uts.cc.utexas.edu/~ailla/texts/sherzer/one_eyed_grandmother.pdf</olac-identifier>\n"; 509 content += " <olac-language/>\n"; 510 content += " <olac-subject>Kuna</olac-subject>\n"; 511 content += " <type>Transcription</type>\n"; 512 content += " </olac>\n"; 513 content += " </Components>\n"; 514 content += "</CMD>\n"; 515 516 File cmdiFile = createCmdiFile("testOlac", content); 517 CMDIDigester digester = new CMDIDigester(getOlacFacetMap()); 518 CMDIData data = digester.process(cmdiFile); 519 assertEquals("oai:ailla.utexas.edu:1", data.getId()); 520 List<String> resources = data.getResources(); 521 assertEquals(0, resources.size()); 522 SolrInputDocument doc = data.getSolrDocument(); 523 assertNotNull(doc); 524 assertEquals(3, doc.getFieldNames().size()); 525 assertEquals(null, doc.getFieldValue("name")); 526 assertEquals(null, doc.getFieldValue("continent")); 527 assertEquals(null, doc.getFieldValue("language")); 528 assertEquals(null, doc.getFieldValue("country")); 529 assertEquals(null, doc.getFieldValue("organisation")); 530 assertEquals("Transcription", doc.getFieldValue("genre")); 531 assertEquals("Kuna", doc.getFieldValue("subject")); 532 assertEquals(2, doc.getFieldValues("description").size()); 533 } 534 535 @Test 536 public void testOlacCollection() throws Exception { 537 String content = ""; 538 content += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; 539 content += "<CMD xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n"; 540 content += " xsi:schemaLocation=\"http://www.clarin.eu/cmd http://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/profiles/clarin.eu:cr1:p_1271859438236/xsd\">\n"; 541 content += " <Header>\n"; 542 content += " <MdCreator>dir2cmdicollection.py</MdCreator>\n"; 543 content += " <MdCreationDate>2010-10-11</MdCreationDate>\n"; 544 content += " <MdSelfLink>collection_ATILF_Resources.cmdi</MdSelfLink>\n"; 545 content += " <MdProfile>clarin.eu:cr1:p_1271859438236</MdProfile>\n"; 546 content += " </Header>\n"; 547 content += " <Resources>\n"; 548 content += " <ResourceProxyList>\n"; 549 content += "<ResourceProxy id=\"ATILF_Resources_0_oai_atilf_inalf_fr_0001.xml.cmdi\"><ResourceType>Metadata</ResourceType><ResourceRef>ATILF_Resources/0/oai_atilf_inalf_fr_0001.xml.cmdi</ResourceRef></ResourceProxy>\n"; 550 content += "<ResourceProxy id=\"ATILF_Resources_0_oai_atilf_inalf_fr_0002.xml.cmdi\"><ResourceType>Metadata</ResourceType><ResourceRef>ATILF_Resources/0/oai_atilf_inalf_fr_0002.xml.cmdi</ResourceRef></ResourceProxy>\n"; 551 content += "<ResourceProxy id=\"ATILF_Resources_0_oai_atilf_inalf_fr_0003.xml.cmdi\"><ResourceType>Metadata</ResourceType><ResourceRef>ATILF_Resources/0/oai_atilf_inalf_fr_0003.xml.cmdi</ResourceRef></ResourceProxy>\n"; 552 content += "<ResourceProxy id=\"ATILF_Resources_0_oai_atilf_inalf_fr_0004.xml.cmdi\"><ResourceType>Metadata</ResourceType><ResourceRef>ATILF_Resources/0/oai_atilf_inalf_fr_0004.xml.cmdi</ResourceRef></ResourceProxy>\n"; 553 content += "<ResourceProxy id=\"ATILF_Resources_0_oai_atilf_inalf_fr_0005_a.xml.cmdi\"><ResourceType>Metadata</ResourceType><ResourceRef>ATILF_Resources/0/oai_atilf_inalf_fr_0005_a.xml.cmdi</ResourceRef></ResourceProxy>\n"; 554 content += "<ResourceProxy id=\"ATILF_Resources_0_oai_atilf_inalf_fr_0005_b.xml.cmdi\"><ResourceType>Metadata</ResourceType><ResourceRef>ATILF_Resources/0/oai_atilf_inalf_fr_0005_b.xml.cmdi</ResourceRef></ResourceProxy>\n"; 555 content += "<ResourceProxy id=\"ATILF_Resources_0_oai_atilf_inalf_fr_0006.xml.cmdi\"><ResourceType>Metadata</ResourceType><ResourceRef>ATILF_Resources/0/oai_atilf_inalf_fr_0006.xml.cmdi</ResourceRef></ResourceProxy>\n"; 556 content += "<ResourceProxy id=\"ATILF_Resources_0_oai_atilf_inalf_fr_M277.xml.cmdi\"><ResourceType>Metadata</ResourceType><ResourceRef>ATILF_Resources/0/oai_atilf_inalf_fr_M277.xml.cmdi</ResourceRef></ResourceProxy>\n"; 557 content += "<ResourceProxy id=\"ATILF_Resources_0_oai_atilf_inalf_fr_M592.xml.cmdi\"><ResourceType>Metadata</ResourceType><ResourceRef>ATILF_Resources/0/oai_atilf_inalf_fr_M592.xml.cmdi</ResourceRef></ResourceProxy>\n"; 558 content += " </ResourceProxyList>\n"; 559 content += " <JournalFileProxyList/>\n"; 560 content += " <ResourceRelationList/>\n"; 561 content += " </Resources>\n"; 562 content += " <Components>\n"; 563 content += " <olac></olac>\n"; 564 content += " </Components>\n"; 565 content += "</CMD>\n"; 566 567 File cmdiFile = createCmdiFile("testOlac", content); 568 CMDIDigester digester = new CMDIDigester(getOlacFacetMap()); 569 CMDIData data = digester.process(cmdiFile); 570 assertEquals("collection_ATILF_Resources.cmdi", data.getId()); 571 List<String> resources = data.getResources(); 572 assertEquals(9, resources.size()); 573 SolrInputDocument doc = data.getSolrDocument(); 574 assertNull(doc); 575 576 } 577 578 private FacetMapping getOlacFacetMap() { 579 BeanFactory factory = new ClassPathXmlApplicationContext(new String[] { "importerConfig.xml" }); //TODO PD doesn't work on the command line. 580 FacetMapping facetMapping = (FacetMapping) factory.getBean("olacMapping"); 581 return facetMapping; 582 } 583 584 private FacetMapping getIMDIFacetMap() { 585 BeanFactory factory = new ClassPathXmlApplicationContext(new String[] { "importerConfig.xml" }); 586 FacetMapping facetMapping = (FacetMapping) factory.getBean("imdiMapping"); 587 return facetMapping; 588 } 589 383 590 private File createCmdiFile(String name, String content) throws IOException { 384 591 File file = File.createTempFile(name, "cmdi", testDir);
Note: See TracChangeset
for help on using the changeset viewer.