Changeset 6356


Ignore:
Timestamp:
07/06/15 11:05:51 (9 years ago)
Author:
davor.ostojic@oeaw.ac.at
Message:

refers #768

default config path is set via system property: configFile

command line params are not processed by the script, just passed to java
there are two optional parameters:
-c <pathToConfigFile>
-l <pathToDataRoot> ... for processing only subset of data roots from configuration file

checkDataRoot method from MetadataImporter? won't exist if dataRoot doesn't exist and it returns the list of existing data roots

new method filterDataRoots:
if dataRoots are passed via -l option, importer will filter the list of existing with those from -l

logging

Location:
vlo/trunk/vlo-importer/src/main
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • vlo/trunk/vlo-importer/src/main/bin/vlo_solr_importer.sh

    r6235 r6356  
    1212fi
    1313
    14 # try to get the configuration file name from the command line options
    15 if [ 1 -eq $# ]; then
    16 CONFIG=$1
    17 else
    18 # fall back to default location
    19 echo No configuration location specified, using default
    20 CONFIG=${DIR}/"../config/VloConfig.xml"
    21 fi
     14#default configuration file
     15DFLT_CONFIG=${DIR}/"../config/VloConfig.xml"
    2216
    2317LOGDIR=${DIR}/../log/
     
    3529# to the JAVA command. Please note the a specification on the command line will
    3630# take preference over a specification as a property.
     31#
     32# to process only subset of data roots from configuration file pass the list via command line:
     33# -l path OR -l "path1 path2 ..."
     34#
    3735
    3836$JAVA -Xmx3G \
    3937    -cp "${DIR}:${DIR}/vlo-importer-${project.version}-importer.jar" \
     38        -DconfigFile=${DFLT_CONFIG} \
    4039    -DIMPORTER_LOG_DIR=${LOGDIR} \
    41     eu.clarin.cmdi.vlo.importer.MetadataImporter -c "$CONFIG"
     40    eu.clarin.cmdi.vlo.importer.MetadataImporter "$@"
    4241
  • vlo/trunk/vlo-importer/src/main/java/eu/clarin/cmdi/vlo/importer/MetadataImporter.java

    r6319 r6356  
    11package eu.clarin.cmdi.vlo.importer;
    2 
    3 import eu.clarin.cmdi.vlo.LanguageCodeUtils;
    4 import eu.clarin.cmdi.vlo.CommonUtils;
    5 import eu.clarin.cmdi.vlo.FacetConstants;
    6 import eu.clarin.cmdi.vlo.config.DataRoot;
    7 import eu.clarin.cmdi.vlo.config.VloConfig;
    8 import eu.clarin.cmdi.vlo.config.XmlVloConfigFactory;
    92
    103import java.io.File;
     
    147import java.text.SimpleDateFormat;
    158import java.util.ArrayList;
     9import java.util.Arrays;
    1610import java.util.Date;
    1711import java.util.HashMap;
    1812import java.util.HashSet;
    1913import java.util.Iterator;
     14import java.util.LinkedList;
    2015import java.util.List;
    2116import java.util.Map;
     
    3833import org.slf4j.LoggerFactory;
    3934
     35import eu.clarin.cmdi.vlo.CommonUtils;
     36import eu.clarin.cmdi.vlo.FacetConstants;
     37import eu.clarin.cmdi.vlo.LanguageCodeUtils;
     38import eu.clarin.cmdi.vlo.config.DataRoot;
     39import eu.clarin.cmdi.vlo.config.VloConfig;
     40import eu.clarin.cmdi.vlo.config.XmlVloConfigFactory;
     41
    4042/**
    4143 * The main metadataImporter class. Also contains the main function.
     
    9395     */
    9496    public MetadataImporter() {}
     97   
     98    public MetadataImporter(String clDatarootsList) {
     99        this.clDatarootsList = clDatarootsList;
     100    }
    95101
    96102    /**
     
    121127        initSolrServer();
    122128        List<DataRoot> dataRoots = checkDataRoots();
     129       
     130        dataRoots = filterDataRootsWithCLArgs(dataRoots);
     131       
    123132        long start = System.currentTimeMillis();
    124133        try {
     
    202211    protected List<DataRoot> checkDataRoots() {
    203212        List<DataRoot> dataRoots = config.getDataRoots();
     213        List<DataRoot> existingDataRoots = new LinkedList<DataRoot>();
    204214        for (DataRoot dataRoot : dataRoots) {
    205215            if (!dataRoot.getRootFile().exists()) {
    206                 LOG.error("Root file " + dataRoot.getRootFile() + " does not exist. Probable configuration error so stopping import.");
    207                 System.exit(1);
    208             }
    209         }
    210         return dataRoots;
     216                LOG.warn("Root file " + dataRoot.getRootFile() + " does not exist. It could be configuration error! Proceeding with next ...");
     217            } else{
     218                existingDataRoots.add(dataRoot);
     219            }
     220           
     221        }
     222        return existingDataRoots;
     223    }
     224   
     225    /**
     226     * if user specified which data roots should be imported,
     227     * list of existing data roots will be filtered with the list from user
     228     *
     229     * @return
     230     */
     231    protected List<DataRoot> filterDataRootsWithCLArgs(List<DataRoot> dataRoots){
     232        if(clDatarootsList == null)
     233                return dataRoots;
     234       
     235       
     236        LOG.info("Filtering configured data root files with command line arguments: \"" + clDatarootsList + "\"" ) ;
     237       
     238        LinkedList<File> fsDataRoots = new LinkedList<File>();
     239       
     240        List<String> paths = Arrays.asList((clDatarootsList.split("\\s+")));
     241       
     242        //Convert String paths to File objects for comparison
     243        for(String path: paths)
     244                fsDataRoots.add(new File(path));
     245       
     246        List<DataRoot> filteredDataRoots = new LinkedList<DataRoot>();
     247        try{
     248                //filter data
     249        dr: for(DataRoot dataRoot: dataRoots){
     250                        for(File fsDataRoot: fsDataRoots){
     251                        if(fsDataRoot.getCanonicalPath().equals(dataRoot.getRootFile().getCanonicalPath())){
     252                                filteredDataRoots.add(dataRoot);
     253                                fsDataRoots.remove(fsDataRoot);
     254                                continue dr;
     255                        }
     256                        }
     257                        LOG.info("Root file " + dataRoot.getRootFile() + " will be omitted from processing");
     258                }
     259        }catch (IOException e){
     260                filteredDataRoots = dataRoots;
     261        }
     262
     263       
     264                return filteredDataRoots;
    211265    }
    212266
     
    529583    public static LanguageCodeUtils languageCodeUtils;
    530584   
     585    //data roots passed from command line   
     586    private String clDatarootsList = null;
     587   
    531588    /**
    532589     * @param args
     
    541598        // use the Apache cli framework for getting command line parameters
    542599        Options options = new Options();
     600       
     601        // Data root list passed from command line with -l option
     602        String cldrList = null;
    543603
    544604        /**
    545605         * Add a "c" option, the option indicating the specification of an XML
    546606         * configuration file
     607         *
     608         * "l" option - to specify which data roots (from config file) to import
     609         * imports all by default
    547610         */
    548611        options.addOption("c", true, "-c <file> : use parameters specified in <file>");
     612        options.addOption("l", true, "-l <dataroot> [ ' ' <dataroot> ]* :  space separated list of dataroots to be processed.\n"
     613                        + "If dataroot is not specified in config file it will be ignored.");
     614        options.getOption("l").setOptionalArg(true);
    549615       
    550616        CommandLineParser parser = new PosixParser();
     
    557623                // the "c" option was specified, now get its value
    558624                configFile = cmd.getOptionValue("c");
     625            }
     626           
     627            if(cmd.hasOption("l")){
     628                cldrList = cmd.getOptionValue("l");
    559629            }
    560630           
     
    599669            }
    600670            System.out.println("Reading configuration from " + configUrl.toString());
     671            LOG.info("Reading configuration from " + configUrl.toString());
    601672            final XmlVloConfigFactory configFactory = new XmlVloConfigFactory(configUrl);
    602673            MetadataImporter.config = configFactory.newConfig();
     
    605676            // optionally, modify the configuration here
    606677            // create and start the importer
    607             MetadataImporter importer = new MetadataImporter();
     678            MetadataImporter importer = new MetadataImporter(cldrList);
    608679            importer.startImport();
    609680
Note: See TracChangeset for help on using the changeset viewer.