Ignore:
Timestamp:
12/15/10 12:48:10 (13 years ago)
Author:
patdui
Message:
  • Added vtd-xml parser which is 10x faster (5mins vs. 1hour for 200.000 cmdi files) then DOM parser. Still supports xpath to query the data out.
  • Made it possible to delete only part of the index, you can delete a whole origin.
File:
1 moved

Legend:

Unmodified
Added
Removed
  • vlo/trunk/vlo_webapp/src/test/java/eu/clarin/cmdi/vlo/importer/CMDIDataProcessorTest.java

    r995 r996  
    77import java.io.File;
    88import java.io.IOException;
     9import java.util.ArrayList;
     10import java.util.Collection;
     11import java.util.Collections;
    912import java.util.List;
    1013
     
    1720import org.springframework.context.support.ClassPathXmlApplicationContext;
    1821
    19 public class CMDIDigesterTest {
     22public class CMDIDataProcessorTest {
    2023
    2124    private static File testDir;
     25
     26    private CMDIDataProcessor getDataParser(FacetMapping map) {
     27        return new CMDIParserVTDXML(map);
     28        //        return new CMDIDigester(map);
     29    }
    2230
    2331    @Test
     
    6674        content += "</CMD>\n";
    6775        File cmdiFile = createCmdiFile("testCorpus", content);
    68         CMDIDigester digester = new CMDIDigester(getIMDIFacetMap());
    69         CMDIData data = digester.process(cmdiFile);
     76        CMDIDataProcessor processor = getDataParser(getIMDIFacetMap());
     77        CMDIData data = processor.process(cmdiFile);
    7078        assertEquals("test-hdl:1839/00-0000-0000-0000-0001-D", data.getId());
    7179        List<String> resources = data.getResources();
     
    362370        content += "</CMD>\n";
    363371        File cmdiFile = createCmdiFile("testSession", content);
    364         CMDIDigester digester = new CMDIDigester(getIMDIFacetMap());
    365         CMDIData data = digester.process(cmdiFile);
     372        CMDIDataProcessor processor = getDataParser(getIMDIFacetMap());
     373        CMDIData data = processor.process(cmdiFile);
    366374        assertEquals("test-hdl:1839/00-0000-0000-0009-294C-9", data.getId());
    367375        List<String> resources = data.getResources();
     
    452460        content += "</CMD>\n";
    453461        File cmdiFile = createCmdiFile("testSession", content);
    454         CMDIDigester digester = new CMDIDigester(getIMDIFacetMap());
    455         CMDIData data = digester.process(cmdiFile);
     462        CMDIDataProcessor processor = getDataParser(getIMDIFacetMap());
     463        CMDIData data = processor.process(cmdiFile);
    456464        assertEquals("test-hdl:1839/00-0000-0000-0009-294C-9", data.getId());
    457465        List<String> resources = data.getResources();
     
    505513        content += "         <description>The one-eyed grandmother is one of many traditional Kuna stories performed in the Kuna gathering house. This story, performed here by Pedro Arias, combines European derived motifs (Tom Thumb and Hansel and Gretel) with themes that seem more Kuna in origin. All are woven together and a moral is provided. Pedro Arias performed this story before a gathered audience in the morning..\n";
    506514        content += "      </description>\n";
     515        content += "         <description>Test</description>\n";
    507516        content += "         <identifier>http://uts.cc.utexas.edu/~ailla/audio/sherzer/one_eyed_grandmother.ram</identifier>\n";
    508517        content += "         <identifier>http://uts.cc.utexas.edu/~ailla/texts/sherzer/one_eyed_grandmother.pdf</identifier>\n";
     
    516525
    517526        File cmdiFile = createCmdiFile("testOlac", content);
    518         CMDIDigester digester = new CMDIDigester(getOlacFacetMap());
    519         CMDIData data = digester.process(cmdiFile);
     527        CMDIDataProcessor processor = getDataParser(getOlacFacetMap());
     528        CMDIData data = processor.process(cmdiFile);
    520529        assertEquals("oai:ailla.utexas.edu:1", data.getId());
    521530        List<String> resources = data.getResources();
     
    532541        assertEquals("transcription", doc.getFieldValue("genre"));
    533542        //  assertEquals("Kuna", doc.getFieldValue("subject"));
    534         assertEquals(2, doc.getFieldValues("description").size());
     543        Collection<Object> fieldValues = doc.getFieldValues("description");
     544        assertEquals(3, fieldValues.size());
     545        List<String> descriptions = new ArrayList(fieldValues);
     546        Collections.sort(descriptions);
     547        assertEquals("\n    Channel: Talking;\n    Genre: Traditional Narrative / Story;\n    Country: Panama;\n"
     548                + "    Place of Recording: Mulatuppu;\n    Event: Community Gathering;\n"
     549                + "    Institutional Affiliation: University of Texas at Austin;\n    Participant Information: Political Leader;\n"
     550                + "      ", descriptions.get(0).toString());
     551        assertEquals("Test", descriptions.get(1).toString());
     552        assertEquals("The one-eyed grandmother is one of many traditional Kuna stories performed "
     553                + "in the Kuna gathering house. This story, performed here by Pedro Arias, combines "
     554                + "European derived motifs (Tom Thumb and Hansel and Gretel) with themes that seem more "
     555                + "Kuna in origin. All are woven together and a moral is provided. Pedro Arias performed "
     556                + "this story before a gathered audience in the morning..\n      ", descriptions.get(2).toString());
    535557    }
    536558
     
    568590
    569591        File cmdiFile = createCmdiFile("testOlac", content);
    570         CMDIDigester digester = new CMDIDigester(getOlacFacetMap());
    571         CMDIData data = digester.process(cmdiFile);
     592        CMDIDataProcessor processor = getDataParser(getOlacFacetMap());
     593        CMDIData data = processor.process(cmdiFile);
    572594        assertEquals("collection_ATILF_Resources.cmdi", data.getId());
    573595        List<String> resources = data.getResources();
     
    616638
    617639        File cmdiFile = createCmdiFile("testOlac", content);
    618         CMDIDigester digester = new CMDIDigester(getLrtFacetMap());
    619         CMDIData data = digester.process(cmdiFile);
     640        CMDIDataProcessor processor = getDataParser(getLrtFacetMap());
     641        CMDIData data = processor.process(cmdiFile);
    620642        assertEquals("clarin.eu:lrt:433", data.getId());
    621643        List<String> resources = data.getResources();
Note: See TracChangeset for help on using the changeset viewer.