Changeset 2373


Ignore:
Timestamp:
11/27/12 14:10:16 (12 years ago)
Author:
herste
Message:

some comments

Location:
vlo/trunk/vlo_webapp/src/main/java/eu/clarin/cmdi/vlo/importer
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • vlo/trunk/vlo_webapp/src/main/java/eu/clarin/cmdi/vlo/importer/CMDIData.java

    r2368 r2373  
    99import java.util.Collection;
    1010import java.util.List;
     11
     12/**
     13 * Represents a document of cmdi data.
     14 */
    1115
    1216public class CMDIData {
  • vlo/trunk/vlo_webapp/src/main/java/eu/clarin/cmdi/vlo/importer/DataRoot.java

    r1437 r2373  
    55public class DataRoot {
    66
     7    /**
     8     * The name of this dataroot. Some string. Try to keep it descriptive. E.g. "MPI CMDIfied IMDI archive", "LRT Inventory"
     9     */
    710    private String originName;
     11    /**
     12     * The directory under which to search for metadata files.
     13     */
    814    private File rootFile;
     15    /**
     16     * The location on some webserver where we can directly link to each metadata file found. E.g. http://localhost/cmdi/
     17     */
    918    private String prefix;
     19    /**
     20     * The bit of the full-path to the file to strip from the file name such that, when combined with the prefix string, one can link directly to the cmdi file.
     21     *
     22     * For example:
     23     * CMDI file found in /var/wwwroot/cmdi/1.cmdi
     24     * We want to convert this to: http://localhost/cmdi/1.cmdi
     25     * our "tostrip" and our "rootFile" can both be /cat/wwwroot/cmdi
     26     * and our "prefix" can be like above.
     27     */
    1028    private String tostrip;
     29    /**
     30     * whether to delete all occurences in the solr server from this originName
     31     */
    1132    private boolean deleteFirst = false;
     33
     34    // Getters and Setters below.
    1235
    1336    public String getPrefix() {
  • vlo/trunk/vlo_webapp/src/main/java/eu/clarin/cmdi/vlo/importer/ImporterConfig.java

    r1230 r2373  
    33import java.util.List;
    44
     5/**
     6 * Represents the configuration of the metadata importer.
     7 */
     8
    59public class ImporterConfig {
    610
     11    /**
     12     * default location of config file.
     13     */
    714    public static final String CONFIG_FILE = "importerConfig.xml";
    815
     16    /**
     17     * whether to delete all.
     18     */
    919    private boolean deleteAllFirst = false;
     20
     21    /**
     22     * whether to print all xpath mappings found (to a file).
     23     */
    1024    private boolean printMapping = false;
    11    
     25
     26    /**
     27     * The list of all DataRoots (which define a directory to search for metadata files and some other things)
     28     */
    1229    private List<DataRoot> dataRoots;
     30
     31    // Getters and Setters below.
     32
    1333
    1434    public List<DataRoot> getDataRoots() {
    1535        return dataRoots;
    1636    }
    17    
     37
    1838    public void setDataRoots(List<DataRoot> dataRoots) {
    1939        this.dataRoots = dataRoots;
  • vlo/trunk/vlo_webapp/src/main/java/eu/clarin/cmdi/vlo/importer/MetadataImporter.java

    r1960 r2373  
    11package eu.clarin.cmdi.vlo.importer;
    22
    3 import java.io.File;
    4 import java.io.IOException;
    5 import java.net.MalformedURLException;
    6 import java.util.ArrayList;
    7 import java.util.Collection;
    8 import java.util.HashMap;
    9 import java.util.HashSet;
    10 import java.util.List;
    11 import java.util.Map;
    12 import java.util.Set;
    13 
     3import eu.clarin.cmdi.vlo.CommonUtils;
     4import eu.clarin.cmdi.vlo.Configuration;
     5import eu.clarin.cmdi.vlo.FacetConstants;
    146import org.apache.commons.io.FileUtils;
    157import org.apache.solr.client.solrj.SolrServerException;
     
    2214import org.springframework.context.support.ClassPathXmlApplicationContext;
    2315
    24 import eu.clarin.cmdi.vlo.CommonUtils;
    25 import eu.clarin.cmdi.vlo.Configuration;
    26 import eu.clarin.cmdi.vlo.FacetConstants;
    27 
     16import java.io.File;
     17import java.io.IOException;
     18import java.net.MalformedURLException;
     19import java.util.*;
     20
     21
     22/**
     23 * The main metadataImporter class. Also contains the main function.
     24 *
     25 * The metadataimporter reads all the config files and then, for each metadatafile in each defined directory structure parses and imports them as defined in the configuration.
     26 * The startImport function starts the importing and so on.
     27 */
    2828
    2929@SuppressWarnings({"serial"})
    3030public class MetadataImporter {
    3131
     32    /**
     33     * Defines which files to try and parse.
     34     * In this case all files ending in "xml" or "cmdi".
     35     */
    3236    private static final String[] VALID_CMDI_EXTENSIONS = new String[] { "xml", "cmdi" };
     37
     38    /**
     39     * Log log log log
     40     */
    3341    private final static Logger LOG = LoggerFactory.getLogger(MetadataImporter.class);
     42    /**
     43     * Some place to store errors.
     44     */
    3445    private static Throwable serverError;
     46    /**
     47     * the solr server.
     48     */
    3549    private StreamingUpdateSolrServer solrServer;
    3650
     51    /**
     52     * Defines the post-processor associations.
     53     * At import, for each facet value, this map is checked and all postprocessors associated with the facet _type_ are applied to the value before storing the new value in the solr document.
     54     */
    3755    final static Map<String, PostProcessor> POST_PROCESSORS = new HashMap<String, PostProcessor>();
    3856    static {
     
    4563    }
    4664
     65    /**
     66     * Contains MDSelflinks (usually).
     67     * Just to know what we have already done.
     68     */
    4769    private Set<String> processedIds = new HashSet<String>();
     70    /**
     71     * Some caching for solr documents (we are more efficient if we ram a whole bunch to the solr server at once.
     72     */
    4873    protected List<SolrInputDocument> docs = new ArrayList<SolrInputDocument>();
     74    /**
     75     * Config.
     76     */
    4977    private final ImporterConfig config;
    5078
     79    // SOME STATS
    5180    private int nrOFDocumentsUpdated;
    5281    private int nrOfFilesAnalyzed = 0;
     
    5584    private int nrOfFilesWithError = 0;
    5685
     86    /**
     87     * Constructor, wants to know the config.
     88     * @param config the config.
     89     */
    5790    public MetadataImporter(ImporterConfig config) {
    5891        this.config = config;
  • vlo/trunk/vlo_webapp/src/main/java/eu/clarin/cmdi/vlo/importer/PostProcessor.java

    r1039 r2373  
    11package eu.clarin.cmdi.vlo.importer;
     2
     3/**
     4 * Defines the interface for a postprocessor.
     5 *
     6 * Such a postprossor is called on a single facet value after this facet value is inserted into the solr document during import.
     7 *
     8 * at the start of MetadataImporter which postprocessors (if any) are used for which facet are defined.
     9 */
    210
    311public interface PostProcessor {
Note: See TracChangeset for help on using the changeset viewer.