Changeset 4016


Ignore:
Timestamp:
11/13/13 14:24:31 (11 years ago)
Author:
teckart
Message:

Added a simple blacklisting approach based on filtering XPath substrings. This needs <blacklistPattern> elements in the facetConcept configuration; all extracted XPath expression that contain one of these blacklisted substrings get removed

Location:
vlo/trunk/vlo_importer/src/main/java/eu/clarin/cmdi/vlo/importer
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • vlo/trunk/vlo_importer/src/main/java/eu/clarin/cmdi/vlo/importer/FacetConceptMapping.java

    r3933 r4016  
    9797        private List<String> patterns = new ArrayList<String>();
    9898       
     99        @XmlElement(name = "blacklistPattern")
     100        private List<String> blacklistPatterns = new ArrayList<String>();
     101       
    99102        public void setConcepts(List<String> concepts) {
    100103            this.concepts = concepts;
     
    171174        public List<String> getPatterns() {
    172175            return patterns;
     176        }
     177       
     178        public void setBlacklistPatterns(List<String> blacklistPatterns) {
     179            this.blacklistPatterns = blacklistPatterns;
     180        }
     181
     182        public List<String> getBlacklistPatterns() {
     183            return blacklistPatterns;
    173184        }
    174185
  • vlo/trunk/vlo_importer/src/main/java/eu/clarin/cmdi/vlo/importer/FacetMappingFactory.java

    r4003 r4016  
    131131                    }
    132132                }
     133               
     134                //add hardcoded patterns only when there is no xpath generated from conceptlink
    133135                if (xpaths.isEmpty()) {
    134                     //add hardcoded patterns only when there is no xpath generated from conceptlink
    135136                    xpaths.addAll(facetConcept.getPatterns());
    136137                }
     138               
     139                // pattern-based blacklisting: remove all XPath expressions that contain a blacklisted substring;
     140                // this is basically a hack to enhance the quality of the visualised information in the VLO;
     141                // should be replaced by a more intelligent approach in the future
     142                for(String blacklistPattern : facetConcept.getBlacklistPatterns()) {
     143                        Iterator<String> xpathIterator = xpaths.iterator();
     144                        while(xpathIterator.hasNext()) {
     145                                String xpath = xpathIterator.next();
     146                                if(xpath.contains(blacklistPattern)) {
     147                                        LOG.debug("Rejecting "+xpath+" because of blacklisted substring "+blacklistPattern);
     148                                        xpathIterator.remove();
     149                                }
     150                        }
     151                }               
     152               
    137153                config.setCaseInsensitive(facetConcept.isCaseInsensitive());
    138154                config.setAllowMultipleValues(facetConcept.isAllowMultipleValues());
Note: See TracChangeset for help on using the changeset viewer.