Changeset 5228


Ignore:
Timestamp:
05/19/14 11:38:48 (10 years ago)
Author:
teckart@informatik.uni-leipzig.de
Message:

Improved documentation and log statements

File:
1 edited

Legend:

Unmodified
Added
Removed
  • vlo/trunk/vlo-importer/src/main/java/eu/clarin/cmdi/vlo/importer/CMDIParserVTDXML.java

    r5008 r5228  
    2626
    2727    @Override
    28         public CMDIData process(File file) throws VTDException, IOException {
    29         CMDIData result = new CMDIData();
     28    public CMDIData process(File file) throws VTDException, IOException {
     29        CMDIData cmdiData = new CMDIData();
    3030        VTDGen vg = new VTDGen();
    3131        FileInputStream fileInputStream = new FileInputStream(file);
    3232        vg.setDoc(IOUtils.toByteArray(fileInputStream));
    3333        vg.parse(true);
    34         fileInputStream.close();;
     34        fileInputStream.close();
    3535       
    3636        VTDNav nav = vg.getNav();
    37         setNameSpace(nav);//setting namespace once, all other instance of AutoPilot keep the setting (a bit tricky).
     37        setNameSpace(nav); //setting namespace once, all other instance of AutoPilot keep the setting (a bit tricky).
    3838        FacetMapping facetMapping = getFacetMapping(nav.cloneNav(), file.getAbsolutePath());
    39         /** New nice error log to find erroneous files */
    40         if(facetMapping.getFacets().size() == 0){
    41             LOG.error("Problems mapping facets for file: " + file.getAbsolutePath());
     39
     40        if(facetMapping.getFacets().isEmpty()){
     41            LOG.error("Problems mapping facets for file: {}", file.getAbsolutePath());
    4242        }
    4343
    4444        nav.toElement(VTDNav.ROOT);
    45         processResources(result, nav);
    46         processFacets(result, nav, facetMapping);
    47         return result;
     45        processResources(cmdiData, nav);
     46        processFacets(cmdiData, nav, facetMapping);
     47        return cmdiData;
    4848    }
    4949
     
    5353    }
    5454
    55     private FacetMapping getFacetMapping(VTDNav nav, String tolog) throws VTDException {
     55    /**
     56     * Extracts valid XML patterns for all facet definitions
     57     * @param nav VTD Navigator
     58     * @param cmdiFilePath Absolute path of the XML file for which nav was created
     59     * @return the facet mapping used to map meta data to facets
     60     * @throws VTDException
     61     */
     62    private FacetMapping getFacetMapping(VTDNav nav, String cmdiFilePath) throws VTDException {
    5663        String xsd = extractXsd(nav);
    5764        if (xsd == null) {
     
    5966        }
    6067        if (xsd.indexOf("http") != xsd.lastIndexOf("http")){
    61             LOG.info("FILE WITH WEIRD HTTP THINGY! " + tolog);
     68            LOG.info("No valid CMDI schema URL was extracted. This is an indication of a broken CMDI file (like false content in //MdProfile element). {}", cmdiFilePath);
    6269        }
    6370        String facetConceptsFile = MetadataImporter.config.getFacetConceptsFile();
     
    6976    }
    7077
     78    /**
     79     * Try two approaches to extract the XSD schema information from the CMDI file
     80     * @param nav VTD Navigator
     81     * @return URL of CMDI schema, or null if neither the CMDI header nor the XMLSchema-instance's attributes contained the information
     82     * @throws VTDException
     83     */
    7184    String extractXsd(VTDNav nav) throws VTDException {
    7285        String xsd = getXsdFromHeader(nav);
     
    7790    }
    7891
     92    /**
     93     * Extract XSD schema information from CMDI header (using element //Header/MdProfile)
     94     * @param nav VTD Navigator
     95     * @return URL to CMDI schema, or null if content of //Header/MdProfile element could not be read
     96     * @throws XPathParseException
     97     * @throws XPathEvalException
     98     * @throws NavException
     99     */
    79100    private String getXsdFromHeader(VTDNav nav) throws XPathParseException, XPathEvalException, NavException {
    80101        String result = null;
     
    90111    }
    91112
     113    /**
     114     * Extract XSD schema information from schemaLocation or noNamespaceSchemaLocation attributes
     115     * @param nav VTD Navigator
     116     * @return URL to CMDI schema, or null if attributes don't exist
     117     * @throws NavException
     118     */
    92119    private String getXsdFromSchemaLocation(VTDNav nav) throws NavException {
    93120        String result = null;
     
    106133    }
    107134   
    108     private void processResources(CMDIData result, VTDNav nav) throws VTDException {
     135    /**
     136     * Extract ResourceProxies from ResourceProxyList
     137     * @param cmdiData representation of the CMDI document
     138     * @param nav VTD Navigator
     139     * @throws VTDException
     140     */
     141    private void processResources(CMDIData cmdiData, VTDNav nav) throws VTDException {
    109142       
    110143        AutoPilot resourceProxy = new AutoPilot(nav);
     
    125158            if (!ref.equals("") && !type.equals("")) {
    126159                // note that the mime type could be empty
    127                 result.addResource(ref, type, mimeType);
    128             }
    129         }
    130     }
    131 
    132     private void processFacets(CMDIData result, VTDNav nav, FacetMapping facetMapping) throws VTDException {
     160                cmdiData.addResource(ref, type, mimeType);
     161            }
     162        }
     163    }
     164
     165    /**
     166     * Extracts facet values according to the facetMapping
     167     * @param cmdiData representation of the CMDI document
     168     * @param nav VTD Navigator
     169     * @param facetMapping the facet mapping used to map meta data to facets
     170     * @throws VTDException
     171     */
     172    private void processFacets(CMDIData cmdiData, VTDNav nav, FacetMapping facetMapping) throws VTDException {
    133173        List<FacetConfiguration> facetList = facetMapping.getFacets();
    134174        for (FacetConfiguration config : facetList) {
    135175            List<String> patterns = config.getPatterns();
    136176            for (String pattern : patterns) {
    137                 boolean matchedPattern = matchPattern(result, nav, config, pattern, config.getAllowMultipleValues());
     177                boolean matchedPattern = matchPattern(cmdiData, nav, config, pattern, config.getAllowMultipleValues());
    138178                if (matchedPattern && !config.getAllowMultipleValues()) {
    139179                    break;
     
    143183    }
    144184
    145     private boolean matchPattern(CMDIData result, VTDNav nav, FacetConfiguration config, String pattern, Boolean allowMultipleValues) throws VTDException {
     185    /**
     186     * Extracts content from CMDI file for a specific facet based on a single XPath expression
     187     * @param cmdiData representation of the CMDI document
     188     * @param nav VTD Navigator
     189     * @param config facet configuration
     190     * @param pattern XPath expression
     191     * @param allowMultipleValues information if multiple values are allowed in this facet
     192     * @return pattern matched a node in the CMDI file?
     193     * @throws VTDException
     194     */
     195    private boolean matchPattern(CMDIData cmdiData, VTDNav nav, FacetConfiguration config, String pattern, Boolean allowMultipleValues) throws VTDException {
    146196        boolean matchedPattern = false;
    147197        AutoPilot ap = new AutoPilot(nav);
     
    156206            String value = nav.toString(index);
    157207            value = postProcess(config.getName(), value);
    158             result.addDocField(config.getName(), value, config.isCaseInsensitive());
     208            cmdiData.addDocField(config.getName(), value, config.isCaseInsensitive());
    159209            index = ap.evalXPath();
    160210           
     
    165215    }
    166216
    167     private String postProcess(String name, String value) {
    168         String result = value;
    169         if (postProcessors.containsKey(name)) {
    170             PostProcessor processor = postProcessors.get(name);
    171             result = processor.process(value);
     217    /**
     218     * Applies registered PostProcessor to extracted values
     219     * @param facetName name of the facet for which value was extracted
     220     * @param extractedValue extracted value from CMDI file
     221     * @return value after applying matching PostProcessor or the original value if no PostProcessor was registered for the facet
     222     */
     223    private String postProcess(String facetName, String extractedValue) {
     224        String result = extractedValue;
     225        if (postProcessors.containsKey(facetName)) {
     226            PostProcessor processor = postProcessors.get(facetName);
     227            result = processor.process(extractedValue);
    172228        }
    173229        return result.trim();
    174230    }
    175 
    176231}
Note: See TracChangeset for help on using the changeset viewer.