Changeset 6287


Ignore:
Timestamp:
06/12/15 07:35:11 (9 years ago)
Author:
Twan Goosen
Message:

Added fields with part count and resource count for further ranking
Refs #750

Location:
vlo/trunk
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • vlo/trunk/vlo-importer/src/main/java/eu/clarin/cmdi/vlo/importer/CountryNamePostProcessor.java

    r5982 r6287  
    33import eu.clarin.cmdi.vlo.CommonUtils;
    44import java.util.ArrayList;
     5import java.util.HashMap;
    56import java.util.List;
    67import java.util.Map;
     
    1516
    1617    /**
    17      * Returns the country name based on the mapping defined in the CMDI component:
    18      * http://catalog.clarin.eu/ds/ComponentRegistry/?item=clarin.eu:cr1:c_1271859438104 If no mapping is found the original value is
    19      * returned.
     18     * Returns the country name based on the mapping defined in the CMDI
     19     * component:
     20     * http://catalog.clarin.eu/ds/ComponentRegistry/?item=clarin.eu:cr1:c_1271859438104
     21     * If no mapping is found the original value is returned.
     22     *
    2023     * @param value extracted "country" value from CMDI file
    2124     * @return List of country names
     
    4851            return result;
    4952        } catch (Exception e) {
    50             throw new RuntimeException("Cannot instantiate postProcessor:", e);
     53            if (CommonUtils.SWALLOW_LOOKUP_ERRORS) {
     54                return new HashMap<String, String>();
     55            } else {
     56                throw new RuntimeException("Cannot instantiate postProcessor:", e);
     57            }
    5158        }
    5259    }
  • vlo/trunk/vlo-importer/src/main/java/eu/clarin/cmdi/vlo/importer/MetadataImporter.java

    r6214 r6287  
    418418                    + resource.getResourceName());
    419419        }
     420        solrDocument.addField(FacetConstants.FIELD_RESOURCE_COUNT, resources.size());
    420421    }
    421422
     
    481482                    partialUpdate.put("set", incomingVertexNames);
    482483                    doc.setField(FacetConstants.FIELD_HAS_PART, partialUpdate);
     484                    doc.setField(FacetConstants.FIELD_HAS_PART_COUNT, incomingVertexNames.size());
    483485                }
    484486               
  • vlo/trunk/vlo-solr/src/main/solr_conf/solr/collection1/conf/schema.xml

    r6284 r6287  
    442442   <field name="_fileName" type="string" indexed="true" stored="true" multiValued="false" />
    443443   <field name="_resourceRef" type="string" indexed="true" stored="true" multiValued="true" />
     444   <field name="_resourceRefCount" type="int" indexed="true" stored="true" multiValued="false" />
    444445   <field name="_componentProfile" type="string" indexed="true" stored="true" multiValued="false" />
    445446   <field name="_contentSearchRef" type="string" indexed="true" stored="true" multiValued="true" />
     
    449450   <field name="_isPartOf" type="string" indexed="true" stored="true" multiValued="true" />
    450451   <field name="_hasPart" type="string" indexed="true" stored="true" multiValued="true" />
     452   <field name="_hasPartCount" type="int" indexed="true" stored="true" multiValued="false" />
    451453   <field name="_hierarchyWeight" type="int" indexed="true" stored="true" multiValued="false" default='0'/>
    452454   <field name="metadataSource" type="metadataSource" indexed="false" stored="true" multiValued="true" />
  • vlo/trunk/vlo-solr/src/main/solr_conf/solr/collection1/conf/solrconfig.xml

    r6284 r6287  
    384384        <!-- Boost fields (boost based on field value regardless of query) -->
    385385        <str name="bf">
    386             <!-- boost by hierarchy weight (reverse order because lower numbers indicate a higher level -->
    387             rord(_hierarchyWeight)
    388            <!-- TODO: use bf to boost records with one or more resources? -->
     386            <!--
     387                boost by hierarchy weight (reverse order because lower numbers
     388                indicate a higher level), then by number of parts, then by
     389                number of resources -->
     390            rord(_hierarchyWeight) log(add(1,_hasPartCount))^.1 <!-- todo: deal with -infinity -->
     391            <!-- TODO: prefer CLARIN records over non-CLARIN? -->
     392            <!-- TODO: boost records with one or more resources (_resourceRefCount) -->
    389393        </str>
    390394       
Note: See TracChangeset for help on using the changeset viewer.