Ignore:
Timestamp:
10/30/15 17:19:52 (9 years ago)
Author:
davor.ostojic@oeaw.ac.at
Message:

cross-mapping
vocabulary-entry instead of invertedMap

File:
1 edited

Legend:

Unmodified
Added
Removed
  • vlo/branches/vlo-3.3-oeaw/vlo-importer/src/main/java/eu/clarin/cmdi/vlo/importer/CMDIParserVTDXML.java

    r6413 r6715  
    11package eu.clarin.cmdi.vlo.importer;
     2
     3import java.io.File;
     4import java.io.FileInputStream;
     5import java.io.IOException;
     6import java.net.URI;
     7import java.util.ArrayList;
     8import java.util.Arrays;
     9import java.util.HashMap;
     10import java.util.LinkedList;
     11import java.util.List;
     12import java.util.Map;
     13import java.util.Map.Entry;
     14import java.util.regex.Matcher;
     15import java.util.regex.Pattern;
     16
     17import org.apache.commons.io.IOUtils;
     18import org.slf4j.Logger;
     19import org.slf4j.LoggerFactory;
    220
    321import com.ximpleware.AutoPilot;
     
    826import com.ximpleware.XPathEvalException;
    927import com.ximpleware.XPathParseException;
     28
    1029import eu.clarin.cmdi.vlo.FacetConstants;
    11 import java.io.File;
    12 import java.io.FileInputStream;
    13 import java.io.IOException;
    14 import java.net.URI;
    15 import java.util.ArrayList;
    16 import java.util.List;
    17 import java.util.Map;
    18 import java.util.regex.Matcher;
    19 import java.util.regex.Pattern;
    20 import org.apache.commons.io.IOUtils;
    21 import org.slf4j.Logger;
    22 import org.slf4j.LoggerFactory;
    2330
    2431public class CMDIParserVTDXML implements CMDIDataProcessor {
     
    231238     */
    232239    private void processFacets(CMDIData cmdiData, VTDNav nav, FacetMapping facetMapping) throws VTDException {
    233         List<FacetConfiguration> facetList = facetMapping.getFacets();
     240               
     241        List<FacetConfiguration> facetList = facetMapping.getFacets();         
    234242        for (FacetConfiguration config : facetList) {
    235243            boolean matchedPattern = false;
     
    289297                continue;
    290298            }
    291 
     299           
    292300            final List<String> values = postProcess(config.getName(), value);
    293             insertFacetValues(config.getName(), values, cmdiData, languageCode, allowMultipleValues, config.isCaseInsensitive());
     301           
     302            insertFacetValues(config.getName(), values, cmdiData, languageCode, allowMultipleValues, config.isCaseInsensitive(), true);
     303           
     304            crossMap(config, value, cmdiData, languageCode);
    294305           
    295306            //add also non curated resource type
     
    304315                    derivedValues.addAll(postProcess(derivedFacet, postProcessedValue));
    305316                }
    306                 insertFacetValues(derivedFacet, derivedValues, cmdiData, languageCode, allowMultipleValues, config.isCaseInsensitive());
     317                insertFacetValues(derivedFacet, derivedValues, cmdiData, languageCode, allowMultipleValues, config.isCaseInsensitive(), true);
    307318            }
    308319
     
    330341    }
    331342
    332     private void insertFacetValues(String name, List<String> valueList, CMDIData cmdiData, String languageCode, boolean allowMultipleValues, boolean caseInsensitive) {
     343   
     344    /*
     345         * Add values to facet either they come from MD fields either from cross mapping
     346         * Advantage is given to the values from MD fields. They will be always at the begging of the list and in case
     347         * when facet doesn't allow multiple values and we already had value from cross mapping this value will be overridden
     348         *
     349         */
     350    private void insertFacetValues(String name, List<String> valueList, CMDIData cmdiData, String languageCode, boolean allowMultipleValues, boolean caseInsensitive, boolean comesFromConceptMapping) {
     351       
     352        //keep only values from original concepts, not from cross mappings
     353                if(comesFromConceptMapping && !allowMultipleValues && cmdiData.getSolrDocument().containsKey(name)){
     354                        cmdiData.getSolrDocument().remove(name);
     355                }
     356               
     357                if(!comesFromConceptMapping && !allowMultipleValues && cmdiData.getSolrDocument().containsKey(name))
     358                        return;
     359       
    333360        for (int i = 0; i < valueList.size(); i++) {
    334361            if (!allowMultipleValues && i > 0) {
     
    361388        return resultList;
    362389    }
     390   
     391    private void crossMap(FacetConfiguration config, String extractedValue, CMDIData cmdiData, String languageCode){
     392       
     393        if (postProcessors.containsKey(config.getName())){
     394            PostProcessor processor = postProcessors.get(config.getName());
     395            if(processor instanceof PostProcessorsWithVocabularyMap){
     396               
     397                List<String> facetNames = MetadataImporter.config.getAllFacetFields();
     398               
     399                Map<String, String> crossMap = ((PostProcessorsWithVocabularyMap) processor).getCrossMappings(extractedValue);
     400                for(Entry e: crossMap.entrySet()){
     401                        String toFacet = (String) e.getKey();
     402                        String value = (String) e.getValue();
     403                        for(String facetName: facetNames){
     404                                if(toFacet.toLowerCase().equals(facetName.toLowerCase())){//normalize facet name, map can contain it in any case
     405                                        insertFacetValues(facetName, Arrays.asList(value), cmdiData, languageCode, config.getAllowMultipleValues(), config.isCaseInsensitive(), false);
     406                                }
     407                        }
     408                }
     409            }
     410        }
     411    }
     412   
    363413}
Note: See TracChangeset for help on using the changeset viewer.