Changeset 1758


Ignore:
Timestamp:
02/21/12 12:04:08 (12 years ago)
Author:
teckart
Message:

Added NationalProjectPostProcessor?, modified importer, added nationalProject-mapping file to resources; for ticket #167

Location:
vlo/trunk/vlo_webapp/src
Files:
2 added
4 edited

Legend:

Unmodified
Added
Removed
  • vlo/trunk/vlo_webapp/src/main/java/eu/clarin/cmdi/vlo/FacetConstants.java

    r1546 r1758  
    1919    public static final String FIELD_PROJECT_NAME = "projectName";
    2020    public static final String FIELD_COMPLETE_METADATA = "completeMD";
     21    public static final String FIELD_NATIONAL_PROJECT = "nationalProject";
    2122
    2223    //The _ facets are not meant to be shown to users.
  • vlo/trunk/vlo_webapp/src/main/java/eu/clarin/cmdi/vlo/importer/MetadataImporter.java

    r1546 r1758  
    2525import eu.clarin.cmdi.vlo.Configuration;
    2626import eu.clarin.cmdi.vlo.FacetConstants;
    27 import java.io.BufferedReader;
    28 import java.io.FileReader;
    29 
    30 @SuppressWarnings("serial")
     27
     28
     29@SuppressWarnings({"serial"})
    3130public class MetadataImporter {
    3231
     
    4241        POST_PROCESSORS.put(FacetConstants.FIELD_RESOURCE_TYPE, new ResourceTypePostProcessor());
    4342        POST_PROCESSORS.put(FacetConstants.FIELD_LANGUAGE_LINK, new LanguageLinkPostProcessor());
     43        POST_PROCESSORS.put(FacetConstants.FIELD_NATIONAL_PROJECT, new NationalProjectPostProcessor());
    4444    }
    4545
     
    5858    }
    5959
     60    /**
     61     * Retrieve all files with VALID_CMDI_EXTENSIONS from all DataRoot entries and starts processing for every single file
     62     * @throws MalformedURLException
     63     */
    6064    void startImport() throws MalformedURLException {
    6165        initSolrServer();
     
    6367        long start = System.currentTimeMillis();
    6468        try {
     69                // Delete the whole Solr db
    6570            if (config.isDeleteAllFirst()) {
    6671                LOG.info("Deleting original data...");
    67                 solrServer.deleteByQuery("*:*");//Delete the whole solr db.
     72                solrServer.deleteByQuery("*:*"); 
    6873                LOG.info("Deleting original data done.");
    6974            }
     
    108113    }
    109114
     115    /**
     116     * Check a List of DataRoots for existence of RootFile (typically parent directory of metadata files)
     117     * @return
     118     */
    110119    private List<DataRoot> checkDataRoots() {
    111120        List<DataRoot> dataRoots = config.getDataRoots();
     
    120129
    121130    /**
    122      *
     131     * Get the rootFile or all files with VALID_CMDI_EXTENSIONS if rootFile is a directory
    123132     * @param rootFile
    124      * @return The rootFile if it is a file or when it is a directory the files in that directory
     133     * @return List with the rootFile or all contained files if rootFile is a directory
    125134     */
    126135    private List<File> getFilesFromDataRoot(File rootFile) {
     
    129138            result.add(rootFile);
    130139        } else {
    131             Collection listFiles = FileUtils.listFiles(rootFile, VALID_CMDI_EXTENSIONS, true);
     140                        Collection<File> listFiles = FileUtils.listFiles(rootFile, VALID_CMDI_EXTENSIONS, true);
    132141            result.addAll(listFiles);
    133142        }
     
    135144    }
    136145
     146    /**
     147     * Initialize SolrServer as specified in configuration file
     148     * @throws MalformedURLException
     149     */
    137150    protected void initSolrServer() throws MalformedURLException {
    138151        String solrUrl = Configuration.getInstance().getSolrUrl();
     
    147160    }
    148161
     162    /**
     163     * Process single CMDI file with CMDIDataProcessor
     164     * @param file CMDI input file
     165     * @param dataOrigin
     166     * @param processor
     167     * @throws SolrServerException
     168     * @throws IOException
     169     */
    149170    private void processCmdi(File file, DataRoot dataOrigin, CMDIDataProcessor processor) throws SolrServerException, IOException {
    150171        nrOfFilesAnalyzed++;
     
    177198    }
    178199
     200    /**
     201     * Check id for validness
     202     * @param id
     203     * @return true if id is acceptable, false otherwise
     204     */
    179205    private boolean idOk(String id) {
    180206        return id != null && !id.isEmpty();
    181207    }
    182208
     209    /**
     210     * Adds some additional information from DataRoot to solrDocument, add solrDocument to document list, submits list to SolrServer every 1000 files
     211     * @param solrDocument
     212     * @param cmdiData
     213     * @param file
     214     * @param dataOrigin
     215     * @throws SolrServerException
     216     * @throws IOException
     217     */
    183218    private void updateDocument(SolrInputDocument solrDocument, CMDIData cmdiData, File file, DataRoot dataOrigin) throws SolrServerException,
    184219            IOException {
     
    197232
    198233        solrDocument.addField(FacetConstants.FIELD_COMPLETE_METADATA, completeMDUrl); // TODO: add the contents of the metadata file here
    199 
     234       
    200235        addResourceData(solrDocument, cmdiData);
    201236        docs.add(solrDocument);
     
    211246     */
    212247    private void addResourceData(SolrInputDocument solrDocument, CMDIData cmdiData) {
    213         List<Object> fieldValues = solrDocument.containsKey(FacetConstants.FIELD_RESOURCE_TYPE) ? new ArrayList(solrDocument
     248        List<Object> fieldValues = solrDocument.containsKey(FacetConstants.FIELD_RESOURCE_TYPE) ? new ArrayList<Object>(solrDocument
    214249                .getFieldValues(FacetConstants.FIELD_RESOURCE_TYPE)) : null;
    215250        solrDocument.removeField(FacetConstants.FIELD_RESOURCE_TYPE); //Remove old values they might be overwritten.
     
    236271    }
    237272
     273    /**
     274     * Send current list of SolrImputDocuments to SolrServer and clears list afterwards
     275     * @throws SolrServerException
     276     * @throws IOException
     277     */
    238278    protected void sendDocs() throws SolrServerException, IOException {
    239279        LOG.info("Sending " + docs.size() + " docs to solr server. Total number of docs updated till now: " + nrOFDocumentsUpdated);
  • vlo/trunk/vlo_webapp/src/main/resources/facetConcepts.xml

    r1546 r1758  
    6767    <pattern>/c:CMD/c:Components/c:OLAC-DcmiTerms/c:type[@dcterms-type="DCMIType"]/text()</pattern>
    6868  </facetConcept>
     69  <facetConcept name="nationalProject">
     70    <pattern>/c:CMD/c:Header/c:MdCollectionDisplayName/text()</pattern>
     71  </facetConcept>
    6972</facetConcepts>
  • vlo/trunk/vlo_webapp/src/test/java/eu/clarin/cmdi/vlo/importer/FacetMappingFactoryTest.java

    r1580 r1758  
    2222                .getFacetMapping("http://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/profiles/clarin.eu:cr1:p_1271859438204/xsd");//IMDI Session profile xsd
    2323        List<FacetConfiguration> facets = facetMapping.getFacets();
    24         assertEquals(14, facets.size());
     24        assertEquals(15, facets.size());
    2525        int index = 0;
    2626        FacetConfiguration mapping = facets.get(index++);
     
    8585        assertEquals(FacetConstants.FIELD_RESOURCE_TYPE, mapping.getName());
    8686        assertEquals(1, mapping.getPatterns().size());
     87        mapping = facets.get(index++);
     88        assertEquals("/c:CMD/c:Header/c:MdCollectionDisplayName/text()", mapping.getPatterns().get(0));
     89        assertEquals(1, mapping.getPatterns().size());
    8790        assertEquals("check to see we tested them all", facets.size(), index);
    8891    }
     
    9396                .getFacetMapping("http://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/profiles/clarin.eu:cr1:p_1288172614026/xsd");
    9497        List<FacetConfiguration> facets = facetMapping.getFacets();
    95         assertEquals(11, facets.size());
     98        assertEquals(12, facets.size());
    9699        int index = 0;
    97100        FacetConfiguration mapping = facets.get(index++);
     
    140143        assertEquals(FacetConstants.FIELD_RESOURCE_TYPE, mapping.getName());
    141144        assertEquals(2, mapping.getPatterns().size());
     145        mapping = facets.get(index++);
     146        assertEquals("/c:CMD/c:Header/c:MdCollectionDisplayName/text()", mapping.getPatterns().get(0));
     147        assertEquals(1, mapping.getPatterns().size());
    142148        assertEquals("check to see we tested them all", facets.size(), index);
    143149    }
     
    148154                .getFacetMapping("http://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/profiles/clarin.eu:cr1:p_1289827960126/xsd");
    149155        List<FacetConfiguration> facets = facetMapping.getFacets();
    150         assertEquals(12, facets.size());
     156        assertEquals(13, facets.size());
    151157        int index = 0;
    152158        FacetConfiguration mapping = facets.get(index++);
     
    202208        assertEquals(2, mapping.getPatterns().size());
    203209        assertEquals("/c:CMD/c:Components/c:LrtInventoryResource/c:LrtCommon/c:ResourceType/text()", mapping.getPatterns().get(0));
     210        mapping = facets.get(index++);
     211        assertEquals("/c:CMD/c:Header/c:MdCollectionDisplayName/text()", mapping.getPatterns().get(0));
     212        assertEquals(1, mapping.getPatterns().size());
    204213        assertEquals("check to see we tested them all", facets.size(), index);
    205214    }
Note: See TracChangeset for help on using the changeset viewer.