Changeset 995


Ignore:
Timestamp:
12/15/10 09:23:33 (13 years ago)
Author:
patdui
Message:
  • added test for lrt mapping
Location:
vlo/trunk/vlo_webapp/src
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • vlo/trunk/vlo_webapp/src/main/java/eu/clarin/cmdi/vlo/importer/CMDIDigester.java

    r992 r995  
    2525    private final static Logger LOG = LoggerFactory.getLogger(CMDIDigester.class);
    2626    private final FacetMapping facetMapping;
    27     //    private XMLReader xmlReader;
    2827    private DocumentBuilder builder;
    2928
     
    3736            throw new RuntimeException("Cannot instantiate documentBuilder:", e);
    3837        }
    39         //        try {
    40         //            xmlReader = createXmlReader();
    41         //        } catch (SAXException e) {
    42         //            throw new RuntimeException("Cannot instantiate xmlReader:", e);
    43         //        }
    4438    }
    4539
     
    5044        XPath xpath = XPathFactory.newInstance().newXPath();
    5145        result = createCMDIData(xpath, inputSource);
    52 
    53         /**
    54          * Do not reuse the digester it holds state on bad parses. We can reuse the xmlReader. Creating a new Digester or reusing an
    55          * instance gives similar performance.
    56          * @see org.apache.commons.digester.Digester
    57          */
    58         //result = (CMDIData) createDigester().parse(inputSource);
    5946        return result;
    6047    }
     
    9279        } // else do nothing it is perfectly acceptable that not all data is in a cmdi file so not everything will be matched. E.G xpath expression evaluation CMDI session files will never match on CMD corpus files.
    9380    }
    94 
    95     //    private Digester createDigester() {
    96     //        Digester digester = new Digester(xmlReader);
    97     //        digester.setValidating(false);
    98     //        digester.addObjectCreate("CMD", CMDIData.class);
    99     //        digester.addBeanPropertySetter(facetMapping.getIdMapping(), "id");
    100     //        digester.addCallMethod("CMD/Resources/ResourceProxyList/ResourceProxy/", "addResource", 2);
    101     //        digester.addCallParam("CMD/Resources/ResourceProxyList/ResourceProxy/ResourceRef", 0);
    102     //        digester.addCallParam("CMD/Resources/ResourceProxyList/ResourceProxy/ResourceType", 1);
    103     //        //        Map<String, String> facetMap = facetMapping.getFacetMap();
    104     //        //        for (String facet : facetMap.keySet()) {
    105     //        //            matchDocumentField(digester, facetMap.get(facet), facet);
    106     //        //        }
    107     //        return digester;
    108     //    }
    109     //
    110     //    private void matchDocumentField(Digester digester, String pattern, String fieldName) {
    111     //        String[] split = pattern.split(",@", 2);
    112     //        String path = split[0];
    113     //        String attribute = split.length == 2 ? split[1] : null;
    114     //        digester.addCallMethod(path, "addDocField", 2);
    115     //        digester.addObjectParam(path, 0, fieldName);
    116     //        digester.addCallParam(path, 1, attribute);
    117     //    }
    118     //
    119     //    private XMLReader createXmlReader() throws SAXException {
    120     //        XMLReader xmlReader = XMLReaderFactory.createXMLReader();
    121     //        xmlReader.setFeature("http://xml.org/sax/features/validation", true);
    122     //        xmlReader.setFeature("http://xml.org/sax/features/namespaces", true);
    123     //        xmlReader.setProperty("http://java.sun.com/xml/jaxp/properties/schemaLanguage", "http://www.w3.org/2001/XMLSchema");
    124     //        return xmlReader;
    125     //    }
    126 
    12781}
  • vlo/trunk/vlo_webapp/src/main/java/eu/clarin/cmdi/vlo/importer/FacetConfiguration.java

    r992 r995  
    3030        return name;
    3131    }
     32   
     33    @Override
     34    public String toString() {
     35        return "name="+name+", pattern="+pattern;
     36    }
    3237}
  • vlo/trunk/vlo_webapp/src/main/resources/importerConfig.xml

    r994 r995  
    136136
    137137  <bean id="imdiMapping" class="eu.clarin.cmdi.vlo.importer.FacetMapping"> <!-- add year? -->
    138     <property name="idMapping" value="CMD/Header/MdSelfLink/text()" />
     138    <property name="idMapping" value="/CMD/Header/MdSelfLink/text()" />
    139139    <property name="facets">
    140140      <list>
    141141        <bean class="eu.clarin.cmdi.vlo.importer.FacetConfiguration">
    142142          <property name="name" value="name" />
    143           <property name="pattern" value="CMD/Components/Session/Name/text()" />
     143          <property name="pattern" value="/CMD/Components/Session/Name/text()" />
    144144        </bean>
    145145        <bean class="eu.clarin.cmdi.vlo.importer.FacetConfiguration">
    146146          <property name="name" value="year" />
    147           <property name="pattern" value="CMD/Components/Session/Date/text()" />
     147          <property name="pattern" value="/CMD/Components/Session/Date/text()" />
    148148        </bean>
    149149        <bean class="eu.clarin.cmdi.vlo.importer.FacetConfiguration">
    150150          <property name="name" value="continent" />
    151           <property name="pattern" value="CMD/Components/Session/MDGroup/Location/Continent/text()" />
     151          <property name="pattern" value="/CMD/Components/Session/MDGroup/Location/Continent/text()" />
    152152        </bean>
    153153        <bean class="eu.clarin.cmdi.vlo.importer.FacetConfiguration">
    154154          <property name="name" value="country" />
    155           <property name="pattern" value="CMD/Components/Session/MDGroup/Location/Country/text()" />
     155          <property name="pattern" value="/CMD/Components/Session/MDGroup/Location/Country/text()" />
    156156        </bean>
    157157        <bean class="eu.clarin.cmdi.vlo.importer.FacetConfiguration">
    158158          <property name="name" value="language" />
    159           <property name="pattern" value="CMD/Components/Session/MDGroup/Content/Content_Languages/Content_Language/Id/text()" />
     159          <property name="pattern" value="/CMD/Components/Session/MDGroup/Content/Content_Languages/Content_Language/Id/text()" />
    160160        </bean>
    161161        <bean class="eu.clarin.cmdi.vlo.importer.FacetConfiguration">
    162162          <property name="name" value="organisation" />
    163           <property name="pattern" value="CMD/Components/Session/MDGroup/Project/Contact/Organisation/text()" />
     163          <property name="pattern" value="/CMD/Components/Session/MDGroup/Project/Contact/Organisation/text()" />
    164164        </bean>
    165165        <bean class="eu.clarin.cmdi.vlo.importer.FacetConfiguration">
    166166          <property name="name" value="genre" />
    167           <property name="pattern" value="CMD/Components/Session/MDGroup/Content/Genre/text()" />
     167          <property name="pattern" value="/CMD/Components/Session/MDGroup/Content/Genre/text()" />
    168168          <property name="caseInsensitive" value="true" />
    169169        </bean>
    170170        <bean class="eu.clarin.cmdi.vlo.importer.FacetConfiguration">
    171171          <property name="name" value="subject" />
    172           <property name="pattern" value="CMD/Components/Session/MDGroup/Content/Subject/text()" />
     172          <property name="pattern" value="/CMD/Components/Session/MDGroup/Content/Subject/text()" />
    173173          <property name="caseInsensitive" value="true" />
    174174        </bean>
    175175        <bean class="eu.clarin.cmdi.vlo.importer.FacetConfiguration">
    176176          <property name="name" value="description" />
    177           <property name="pattern" value="CMD/Components/Session/descriptions/Description/text()" />
     177          <property name="pattern" value="/CMD/Components/Session/descriptions/Description/text()" />
    178178        </bean>
    179179      </list>
     
    187187
    188188  <bean id="olacMapping" class="eu.clarin.cmdi.vlo.importer.FacetMapping">
    189     <property name="idMapping" value="CMD/Header/MdSelfLink/text()" /> <!-- And some other example see http://trac.clarin.eu/wiki/CmdiVirtualLanguageObservatory -->
     189    <property name="idMapping" value="/CMD/Header/MdSelfLink/text()" /> <!-- And some other example see http://trac.clarin.eu/wiki/CmdiVirtualLanguageObservatory -->
    190190    <property name="facets">
    191191      <list>
    192192        <bean class="eu.clarin.cmdi.vlo.importer.FacetConfiguration">
    193193          <property name="name" value="name" />
    194           <property name="pattern" value="CMD/Components/OLAC-DcmiTerms/title/text()" />
     194          <property name="pattern" value="/CMD/Components/OLAC-DcmiTerms/title/text()" />
    195195        </bean>
    196196        <bean class="eu.clarin.cmdi.vlo.importer.FacetConfiguration">
    197197          <property name="name" value="country" />
    198           <property name="pattern" value="CMD/Components/OLAC-DcmiTerms/spatial/text()" />
     198          <property name="pattern" value="/CMD/Components/OLAC-DcmiTerms/spatial/text()" />
    199199          <!--
    200200            /CMD/Components/OLAC-DcmiTerms/spatial[@dcterms-type="ISO3166"] -> country
     
    204204        <bean class="eu.clarin.cmdi.vlo.importer.FacetConfiguration">
    205205          <property name="name" value="language" />
    206           <property name="pattern" value="CMD/Components/OLAC-DcmiTerms/language/@olac-language" />
     206          <property name="pattern" value="/CMD/Components/OLAC-DcmiTerms/language/@olac-language" />
    207207        </bean>
    208208        <bean class="eu.clarin.cmdi.vlo.importer.FacetConfiguration">
    209209          <property name="name" value="organisation" />
    210           <property name="pattern" value="CMD/Components/OLAC-DcmiTerms/publisher/text()" />
     210          <property name="pattern" value="/CMD/Components/OLAC-DcmiTerms/publisher/text()" />
    211211        </bean>
    212212        <bean class="eu.clarin.cmdi.vlo.importer.FacetConfiguration">
    213213          <property name="name" value="genre" />
    214           <property name="pattern" value="CMD/Components/OLAC-DcmiTerms/type/@olac-linguistic-type" />
     214          <property name="pattern" value="/CMD/Components/OLAC-DcmiTerms/type/@olac-linguistic-type" />
    215215          <property name="caseInsensitive" value="true" />
    216216        </bean>
    217217        <bean class="eu.clarin.cmdi.vlo.importer.FacetConfiguration">
    218218          <property name="name" value="description" />
    219           <property name="pattern" value="CMD/Components/OLAC-DcmiTerms/description/text()" />
     219          <property name="pattern" value="/CMD/Components/OLAC-DcmiTerms/description/text()" />
    220220        </bean>
    221221        <!--
     
    244244
    245245  <bean id="lrtMapping" class="eu.clarin.cmdi.vlo.importer.FacetMapping">
    246     <property name="idMapping" value="CMD/Header/MdSelfLink/text()" />
     246    <property name="idMapping" value="/CMD/Header/MdSelfLink/text()" />
    247247    <property name="facets">
    248248      <list>
    249249        <bean class="eu.clarin.cmdi.vlo.importer.FacetConfiguration">
    250250          <property name="name" value="name" />
    251           <property name="pattern" value="CMD/Components/LrtInventoryResource/LrtCommon/ResourceName/text()" />
     251          <property name="pattern" value="/CMD/Components/LrtInventoryResource/LrtCommon/ResourceName/text()" />
    252252        </bean>
    253253        <bean class="eu.clarin.cmdi.vlo.importer.FacetConfiguration">
    254254          <property name="name" value="country" />
    255           <property name="pattern" value="CMD/Components/LrtInventoryResource/LrtCommon/Countries/Country/code/text()" />
     255          <property name="pattern" value="/CMD/Components/LrtInventoryResource/LrtCommon/Countries/Country/Code/text()" />
    256256        </bean>
    257257        <bean class="eu.clarin.cmdi.vlo.importer.FacetConfiguration">
    258258          <property name="name" value="language" />
    259           <property name="pattern" value="CMD/Components/LrtInventoryResource/LrtCommon/Languages/ISO639/iso-639-3-code/text()" />
     259          <property name="pattern" value="/CMD/Components/LrtInventoryResource/LrtCommon/Languages/ISO639/iso-639-3-code/text()" />
    260260        </bean>
    261261        <bean class="eu.clarin.cmdi.vlo.importer.FacetConfiguration">
    262262          <property name="name" value="organisation" />
    263           <property name="pattern" value="CMD/Components/LrtInventoryResource/LrtCommon/Institute/text()" />
     263          <property name="pattern" value="/CMD/Components/LrtInventoryResource/LrtCommon/Institute/text()" />
    264264        </bean>
    265265        <bean class="eu.clarin.cmdi.vlo.importer.FacetConfiguration">
    266266          <property name="name" value="year" />
    267267          <property name="pattern"
    268             value="CMD/Components/LrtInventoryResource/LrtCommon/FinalizationYearResourceCreation/text()" />
     268            value="/CMD/Components/LrtInventoryResource/LrtCommon/FinalizationYearResourceCreation/text()" />
    269269        </bean>
    270270        <bean class="eu.clarin.cmdi.vlo.importer.FacetConfiguration">
    271271          <property name="name" value="description" />
    272           <property name="pattern" value="CMD/Components/LrtInventoryResource/LrtCommon/Description/text()" />
     272          <property name="pattern" value="/CMD/Components/LrtInventoryResource/LrtCommon/Description/text()" />
    273273        </bean>
    274274        <!--        <entry key="continent">-->
  • vlo/trunk/vlo_webapp/src/test/java/eu/clarin/cmdi/vlo/importer/CMDIDigesterTest.java

    r989 r995  
    379379                "This  recording was made to generate a freely available test resource including speech and gestures. The annotations were created by Peter and Kita who is gesture researcher at the MPI for Psycholinguistics.",
    380380                doc.getFieldValue("description"));
    381         assertEquals("2002-10-30", doc.getFieldValue("year")); //TODO PD curate year needs to be only the year also for olac, lrt
     381        assertEquals("2002-10-30", doc.getFieldValue("year"));
    382382        assertEquals(null, doc.getFieldValue("subject"));
    383 //TODO PD make it work check TRAC ticket to get resources in
    384 //        Collection<Object> fieldValue = doc.getFieldValues("resource");
    385 //        assertEquals(2, fieldValue.size());
    386383    }
    387384
     
    534531        assertEquals(null, doc.getFieldValue("organisation"));
    535532        assertEquals("transcription", doc.getFieldValue("genre"));
    536       //  assertEquals("Kuna", doc.getFieldValue("subject"));
     533        //  assertEquals("Kuna", doc.getFieldValue("subject"));
    537534        assertEquals(2, doc.getFieldValues("description").size());
    538535    }
     
    581578    }
    582579
    583     //TODO PD add LRT test
    584    
     580    @Test
     581    public void testLrtCollection() throws Exception {
     582        String content = "";
     583        content += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
     584        content += "<CMD ns0:schemaLocation=\"http://www.clarin.eu/cmd http://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/profiles/clarin.eu:cr1:p_1289827960126/xsd\" xmlns:ns0=\"http://www.w3.org/2001/XMLSchema-instance\">\n";
     585        content += "    <Header>\n";
     586        content += "        <MdCreator>lrt2cmdi.py</MdCreator>\n";
     587        content += "        <MdCreationDate>2010-11-17</MdCreationDate>\n";
     588        content += "        <MdSelfLink>clarin.eu:lrt:433</MdSelfLink>\n";
     589        content += "        <MdProfile>clarin.eu:cr1:p_1289827960126</MdProfile>\n";
     590        content += "    </Header>\n";
     591        content += "    <Resources>\n";
     592        content += "        <ResourceProxyList />\n";
     593        content += "        <JournalFileProxyList />\n";
     594        content += "        <ResourceRelationList />\n";
     595        content += "    </Resources>\n";
     596        content += "    <Components>\n";
     597        content += "        <LrtInventoryResource>\n";
     598        content += "            <LrtCommon>\n";
     599        content += "                <ResourceName>Corpus of Present-day Written Estonian</ResourceName>\n";
     600        content += "                <ResourceType>Written Corpus</ResourceType>\n";
     601        content += "                <LanguagesOther />\n";
     602        content += "                <Description>written general; 95 mio words; TEI/SGML</Description>\n";
     603        content += "                <ContactPerson>Kadri.Muischnek@ut.ee</ContactPerson>\n";
     604        content += "                <Format />\n";
     605        content += "                <Institute>Test</Institute>\n";
     606        content += "                <MetadataLink />\n";
     607        content += "                <Publications />\n";
     608        content += "                <ReadilyAvailable>true</ReadilyAvailable>\n";
     609        content += "                <ReferenceLink />         \n";
     610        content += "                <Languages><ISO639><iso-639-3-code>est</iso-639-3-code></ISO639></Languages>\n";
     611        content += "                <Countries><Country><Code>EE</Code></Country></Countries>\n";
     612        content += "            </LrtCommon>\n";
     613        content += "       </LrtInventoryResource>\n";
     614        content += "    </Components>\n";
     615        content += "</CMD>\n";
     616
     617        File cmdiFile = createCmdiFile("testOlac", content);
     618        CMDIDigester digester = new CMDIDigester(getLrtFacetMap());
     619        CMDIData data = digester.process(cmdiFile);
     620        assertEquals("clarin.eu:lrt:433", data.getId());
     621        List<String> resources = data.getResources();
     622        assertEquals(0, resources.size());
     623        SolrInputDocument doc = data.getSolrDocument();
     624        assertNotNull(doc);
     625        assertEquals(5, doc.getFieldNames().size());
     626        assertEquals("Corpus of Present-day Written Estonian", doc.getFieldValue("name"));
     627        assertEquals(null, doc.getFieldValue("continent"));
     628        assertEquals(1, doc.getFieldValues("language").size());
     629        assertEquals("est", doc.getFieldValue("language"));
     630        assertEquals("EE", doc.getFieldValue("country"));
     631        assertEquals("Test", doc.getFieldValue("organisation"));
     632        assertEquals(null, doc.getFieldValue("year"));
     633        assertEquals(null, doc.getFieldValue("genre"));
     634        assertEquals("written general; 95 mio words; TEI/SGML", doc.getFieldValue("description"));
     635    }
     636
    585637    private FacetMapping getOlacFacetMap() {
    586         BeanFactory factory = new ClassPathXmlApplicationContext(new String[] { "importerConfig.xml" }); 
     638        BeanFactory factory = new ClassPathXmlApplicationContext(new String[] { "importerConfig.xml" });
    587639        FacetMapping facetMapping = (FacetMapping) factory.getBean("olacMapping");
    588640        return facetMapping;
     
    592644        BeanFactory factory = new ClassPathXmlApplicationContext(new String[] { "importerConfig.xml" });
    593645        FacetMapping facetMapping = (FacetMapping) factory.getBean("imdiMapping");
     646        return facetMapping;
     647    }
     648
     649    private FacetMapping getLrtFacetMap() {
     650        BeanFactory factory = new ClassPathXmlApplicationContext(new String[] { "importerConfig.xml" });
     651        FacetMapping facetMapping = (FacetMapping) factory.getBean("lrtMapping");
    594652        return facetMapping;
    595653    }
Note: See TracChangeset for help on using the changeset viewer.