Changeset 1936


Ignore:
Timestamp:
05/15/12 13:35:42 (12 years ago)
Author:
herste
Message:

started adding type of thing described code, based on the cmdi profile used

Location:
vlo/trunk
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • vlo/trunk/vlo_solr/src/main/solr_conf/solr/conf/schema.xml

    r1784 r1936  
    1 <?xml version="1.0" encoding="UTF-8" ?>
     1itu<?xml version="1.0" encoding="UTF-8" ?>
    22<!--
    33 Licensed to the Apache Software Foundation (ASF) under one or more
     
    1717-->
    1818
    19 <!-- 
     19<!--
    2020 This is the Solr schema file. This file should be named "schema.xml" and
    2121 should be in the conf directory under the solr home
    22  (i.e. ./solr/conf/schema.xml by default) 
     22 (i.e. ./solr/conf/schema.xml by default)
    2323 or located where the classloader for the Solr webapp can find it.
    2424
     
    5252       not normally be changed by applications.
    5353       1.0: multiValued attribute did not exist, all fields are multiValued by nature
    54        1.1: multiValued attribute introduced, false by default 
     54       1.1: multiValued attribute introduced, false by default
    5555       1.2: omitTermFreqAndPositions attribute introduced, true by default except for text fields.
    5656     -->
     
    6565    -->
    6666
    67     <!-- The StrField type is not analyzed, but indexed/stored verbatim. 
     67    <!-- The StrField type is not analyzed, but indexed/stored verbatim.
    6868       - StrField and TextField support an optional compressThreshold which
    6969       limits compression (if enabled in the derived fields) to values which
     
    9191         then default lucene sorting will be used which places docs without the
    9292         field first in an ascending sort and last in a descending sort.
    93     -->   
     93    -->
    9494
    9595    <!--
     
    118118    <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
    119119         is a more restricted form of the canonical representation of dateTime
    120          http://www.w3.org/TR/xmlschema-2/#dateTime   
     120         http://www.w3.org/TR/xmlschema-2/#dateTime
    121121         The trailing "Z" designates UTC time and is mandatory.
    122122         Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
     
    133133                  ... 6 months and 3 days in the future from the start of
    134134                      the current day
    135                      
     135
    136136         Consult the DateField javadocs for more information.
    137137
     
    178178    <!-- The "RandomSortField" is not used to store or search any
    179179         data.  You can declare fields of this type it in your schema
    180          to generate pseudo-random orderings of your docs for sorting 
    181          purposes.  The ordering is generated based on the field name 
     180         to generate pseudo-random orderings of your docs for sorting
     181         purposes.  The ordering is generated based on the field name
    182182         and the version of the index, As long as the index version
    183183         remains unchanged, and the same field name is reused,
    184          the ordering of the docs will be consistent. 
     184         the ordering of the docs will be consistent.
    185185         If you want different psuedo-random orderings of documents,
    186186         for the same version of the index, use a dynamicField and
     
    294294
    295295    <!-- A general unstemmed text field that indexes tokens normally and also
    296          reversed (via ReversedWildcardFilterFactory), to enable more efficient 
     296         reversed (via ReversedWildcardFilterFactory), to enable more efficient
    297297         leading wildcard queries. -->
    298298    <fieldType name="text_rev" class="solr.TextField" positionIncrementGap="100">
     
    346346        <!-- The PatternReplaceFilter gives you the flexibility to use
    347347             Java Regular expression to replace any sequence of characters
    348              matching a pattern with an arbitrary replacement string, 
     348             matching a pattern with an arbitrary replacement string,
    349349             which may include back references to portions of the original
    350350             string matched by the pattern.
    351              
     351
    352352             See the Java Regular Expression documentation for more
    353353             information on pattern and replacement string syntax.
    354              
     354
    355355             http://java.sun.com/j2se/1.5.0/docs/api/java/util/regex/package-summary.html
    356356          -->
     
    360360      </analyzer>
    361361    </fieldType>
    362    
     362
    363363    <fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" >
    364364      <analyzer>
     
    374374        The DelimitedPayloadTokenFilter can put payloads on tokens... for example,
    375375        a token of "foo|1.4"  would be indexed as "foo" with a payload of 1.4f
    376         Attributes of the DelimitedPayloadTokenFilterFactory : 
     376        Attributes of the DelimitedPayloadTokenFilterFactory :
    377377         "delimiter" - a one character delimiter. Default is | (pipe)
    378378         "encoder" - how to encode the following value into a playload
     
    396396
    397397    <!-- since fields of this type are by default not stored or indexed,
    398          any data added to them will be ignored outright.  --> 
    399     <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" /> 
     398         any data added to them will be ignored outright.  -->
     399    <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
    400400
    401401 </types>
     
    437437   <field name="_fileName" type="string" indexed="true" stored="true" multiValued="false" />
    438438   <field name="_resourceRef" type="string" indexed="true" stored="true" multiValued="true" />
     439   <field name="_componentProfile" type="string" indexed="true" stored="true" multiValued="false" />
    439440   <field name="metadataSource" type="metadataSource" indexed="false" stored="true" multiValued="true" />
    440441   <field name="languages" type="string" indexed="true" stored="true" multiValued="true" />
  • vlo/trunk/vlo_webapp/src/main/java/eu/clarin/cmdi/vlo/FacetConstants.java

    r1827 r1936  
    2424    public static final String FIELD_FILENAME = "_fileName";
    2525    public static final String FIELD_RESOURCE = "_resourceRef";
     26    public static final String FIELD_CLARIN_PROFILE = "_componentProfile";
    2627
    2728    // PREFIX URL for the language-link
  • vlo/trunk/vlo_webapp/src/main/java/eu/clarin/cmdi/vlo/importer/MetadataImporter.java

    r1854 r1936  
    4242        POST_PROCESSORS.put(FacetConstants.FIELD_LANGUAGES, new LanguageLinkPostProcessor());
    4343        POST_PROCESSORS.put(FacetConstants.FIELD_NATIONAL_PROJECT, new NationalProjectPostProcessor());
     44        // TODO: Define tools postprocessor and create input file and so on. Maybe do this dynamicly?
    4445    }
    4546
     
    232233        metadataSourceUrl += file.getAbsolutePath().substring(dataOrigin.getTostrip().length());
    233234
    234         solrDocument.addField(FacetConstants.FIELD_COMPLETE_METADATA, metadataSourceUrl); // TODO: add the contents of the metadata file here
     235        solrDocument.addField(FacetConstants.FIELD_COMPLETE_METADATA, metadataSourceUrl);
    235236
    236237        addResourceData(solrDocument, cmdiData);
  • vlo/trunk/vlo_webapp/src/main/resources/facetConcepts.xml

    r1786 r1936  
    7171    <pattern>/c:CMD/c:Header/c:MdCollectionDisplayName/text()</pattern>
    7272  </facetConcept>
     73    <facetConcept name="_componentProfile">
     74        <pattern>/c:CMD/c:Header/c:MdProfile/text()</pattern>
     75    </facetConcept>
    7376</facetConcepts>
Note: See TracChangeset for help on using the changeset viewer.