Changeset 266


Ignore:
Timestamp:
03/22/10 11:02:26 (14 years ago)
Author:
dietuyt
Message:

Changes from INL to CLARIN-NL components

Location:
metadata/trunk/toolkit/components/clarin-nl
Files:
19 added
5 deleted
5 edited

Legend:

Unmodified
Added
Removed
  • metadata/trunk/toolkit/components/clarin-nl/collection/cmdi-corpus.xml

    r200 r266  
    11<?xml version="1.0" encoding="UTF-8"?>
    22<CMD_ComponentSpec xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    3     xsi:schemaLocation="http://www.clarin.eu/cmd http://www.clarin.eu/cmd/general-component-schema.xsd" isProfile="true">
     3    xsi:schemaLocation="http://www.clarin.eu/cmd http://www.clarin.eu/cmd/general-component-schema.xsd" isProfile="false">
    44
    5     <Header></Header>
    6     <!-- belangrijk: maximaal 1 corpus per instantiatie -->
    7     <CMD_Component name="corpus" CardinalityMax="1">
     5    <Header>
     6        <Name>Corpus</Name>
     7        <Description> A component for metadata that is characteristic for corpora, e.g. information about the (number of) languages included, information about the validation of the corpus,..</Description>
     8    </Header>
     9
     10    <CMD_Component name="Corpus" CardinalityMax="1">
    811       
    9         <CMD_Element name="multilinguality">
     12        <CMD_Element name="Multilinguality" ConceptLink="http://www.isocat.org/datcat/DC-2491" CardinalityMax="1">
    1013            <ValueScheme>
    1114                <enumeration>
     
    1619            </ValueScheme>
    1720        </CMD_Element>
    18        
    19         <!-- eg a corpus of travel planning prompts  -->
    20         <CMD_Element name="Topic" CardinalityMin="0" CardinalityMax="unbounded" ValueScheme="string"/>
    21        
    22         <CMD_Component filename="http://www.clarin.eu/cmd/components/clarin-nl/common/component-size.xml"/>
    23         <CMD_Component filename="http://www.clarin.eu/cmd/components/clarin-nl/common/component-subjectlanguage.xml"/>
    24         <CMD_Component filename="component-corpusmodality.xml"/>
    25         <CMD_Component filename="http://www.clarin.eu/cmd/components/clarin-nl/common/component-validation.xml"/>
     21
     22        <CMD_Element name="Topic" ValueScheme="string" ConceptLink="http://www.isocat.org/datcat/DC-2503" CardinalityMin="0"/>
     23        <CMD_Component name="AnnotationType" CardinalityMax="unbounded"/>
     24        <!-- the component Size contains information about the total size of the corpus and if more than one language is included in the resource, about the size per language. e.g. number of words -->
     25        <CMD_Component filename="http://www.clarin.eu/cmd/components/clarin-nl/common/cmdi-size.xml" CardinalityMax="1"/>
     26        <!-- within the component Corpus the component SubjectLanguages identifies the language(s) included in the corpus and indicates which language is the dominant language, the source language and/or the target language-->
     27        <CMD_Component filename="http://www.clarin.eu/cmd/components/clarin-nl/common/cmdi-subjectlanguages.xml" CardinalityMax="1"/>
     28        <!-- the component CorpusModality contains information about the modalities that are contained in the corpus. e.g. speech, signs, facial expressions,... -->
     29        <CMD_Component filename="http://www.clarin.eu/cmd/components/clarin-nl/collection/cmdi-corpusmodality.xml" CardinalityMax="unbounded"/>
     30        <!-- within the component Corpus the component Validation contains information about the validation mode, style and level of the corpus-->
     31        <CMD_Component filename="http://www.clarin.eu/cmd/components/clarin-nl/common/cmdi-validation.xml" CardinalityMin="0" CardinalityMax="1"/>
    2632               
    2733    </CMD_Component>
  • metadata/trunk/toolkit/components/clarin-nl/collection/cmdi-speech-corpus.xml

    r186 r266  
    55        <ID></ID>
    66        <Name>Speech Corpus</Name>
    7         <Description>Speech Corpus component</Description>
     7        <Description>A component for metadata which is characteristic for a speech corpus, like the number of speakers included in the corpus, information about the recording environments,...</Description>
    88    </Header>
     9
    910    <CMD_Component name="SpeechCorpus" CardinalityMax="1">
    10 
    11         <!-- in hours -->
     11     
    1212        <CMD_Element name="DurationOfEffectiveSpeech"
    13             ConceptLink="http://www.isocat.org/datcat/DC-2691" ValueScheme="decimal"/>
     13            ConceptLink="http://www.isocat.org/datcat/DC-2691" ValueScheme="string" CardinalityMin="0" CardinalityMax="1"/>
    1414        <CMD_Element name="DurationOfFullDatabase"
    15             ConceptLink="http://www.isocat.org/datcat/DC-2690" ValueScheme="decimal"/>
    16         <CMD_Element name="NumberOfSpeakers" ValueScheme="decimal"/>
    17 
    18 
     15            ConceptLink="http://www.isocat.org/datcat/DC-2690" ValueScheme="string" CardinalityMax="1"/>
     16        <CMD_Element name="NumberOfSpeakers" ValueScheme="decimal" ConceptLink="http://www.isocat.org/datcat/DC-2692" CardinalityMax="1"/>
    1917        <CMD_Element name="RecordingEnvironment" ValueScheme="string"
    20             ConceptLink="http://www.isocat.org/datcat/DC-2696">
     18            ConceptLink="http://www.isocat.org/datcat/DC-2696" CardinalityMax="unbounded">
    2119            <ValueScheme>
    2220                <enumeration>
    2321                    <item>home/office</item>
    24                     <item>studio</item>
    25                     <item>vehicle</item>
    26                     <item>public place</item>
    27                     <item>telephone</item>
     22                    <item ConceptLink="http://www.isocat.org/datcat/DC-2698">studio</item>
     23                    <item ConceptLink="http://www.isocat.org/datcat/DC-2699">vehicle</item>
     24                    <item ConceptLink="http://www.isocat.org/datcat/DC-2700">public place</item>
     25                    <item ConceptLink="http://www.isocat.org/datcat/DC-2596">telephone</item>
     26                    <item ConceptLink="http://www.isocat.org/datcat/DC-2599">other</item>
    2827                </enumeration>
    2928            </ValueScheme>
    3029        </CMD_Element>
    31        
     30        <CMD_Element name="SpeakerDemographics" ValueScheme="string" CardinalityMax="1"/>
    3231
    33         <!-- nr of speakers per sex/age class/dialect/socio-economic status etc -->
    34         <CMD_Element name="SpeakerDemographics" ValueScheme="string"/>
    3532
    36         <CMD_Element name="AnnotationLevelType" ConceptLink="http://www.isocat.org/datcat/DC-2462">
     33
     34        <CMD_Element name="Quality" CardinalityMin="0" CardinalityMax="unbounded" ConceptLink="http://www.isocat.org/datcat/DC-2574" >
    3735            <ValueScheme>
    3836                <enumeration>
    39                     <item>orthography</item>
    40                     <item>non-speech events</item>
    41                     <item>morphology</item>
    42                     <item>syntax</item>
    43                     <item>semantics</item>
    44                     <item>phonetic</item>
    45                     <item>prosody</item>
    46                     <item>segments</item>
    47                     <item>speaker turn</item>
    48                     <item>topic</item>
    49                     <item>sound quality</item>
    50                     <!-- new in isocat -->
    51                     <item>gaze direction</item>
    52                     <!-- check UPF suggestions -->
    53                 </enumeration>
    54             </ValueScheme>
     37                    <item ConceptLink="http://www.isocat.org/datcat/DC-2666">1</item>
     38                    <item ConceptLink="http://www.isocat.org/datcat/DC-2667">2</item>
     39                    <item ConceptLink="http://www.isocat.org/datcat/DC-2668">3</item>
     40                    <item ConceptLink="http://www.isocat.org/datcat/DC-2669">4</item>
     41                    <item ConceptLink="http://www.isocat.org/datcat/DC-2670">5</item>
     42                    <item ConceptLink="http://www.isocat.org/datcat/DC-2591">Unknown</item>
     43                    <item ConceptLink="http://www.isocat.org/datcat/DC-2592">Unspecified</item>
     44                </enumeration>               
     45           </ValueScheme>
    5546        </CMD_Element>
     47        <CMD_Element name="RecordingPlatformHardware" ValueScheme="string" ConceptLink="http://www.isocat.org/datcat/DC-2693" CardinalityMin="0" CardinalityMax="unbounded"/>
     48        <CMD_Element name="RecordingPlatformSoftware" ValueScheme="string" ConceptLink="http://www.isocat.org/datcat/DC-2694" CardinalityMin="0" CardinalityMax="unbounded"/>
    5649
    57         <CMD_Element name="Quality" ValueScheme="string" CardinalityMin="0"/>
    58         <CMD_Element name="RecordingPlatformHardware" ValueScheme="string"/>
    59         <CMD_Element name="RecordingPlatformSoftware" ValueScheme="string"/>
    60 
    61        
    62         <CMD_Component filename="cmdi-speech-technical.xml"/>
    63 
    64         <CMD_Component filename="http://www.clarin.eu/cmd/components/clarin-nl/common/component-validation.xml"/>
     50        <!-- within the component SpeechCorpus the component SpeechTechnical contains technical information about the entire speech corpus like the sampling frequency, the compression,... -->
     51        <CMD_Component filename="http://www.clarin.eu/cmd/components/clarin-nl/collection/cmdi-speech-technical.xml" CardinalityMax="unbounded"/>
    6552
    6653    </CMD_Component>
  • metadata/trunk/toolkit/components/clarin-nl/collection/cmdi-speech-technical.xml

    r233 r266  
    11<?xml version="1.0" encoding="UTF-8"?>
    22<CMD_ComponentSpec xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    3     xsi:schemaLocation="http://www.clarin.eu/cmd http://www.clarin.eu/cmd/general-component-schema.xsd">
     3                   xsi:schemaLocation="http://www.clarin.eu/cmd http://www.clarin.eu/cmd/general-component-schema.xsd"
     4                   isProfile="false">
    45    <Header>
    5         <ID></ID>
     6        <ID/>
    67        <Name>Speech Technical Metadata</Name>
    7         <Description>Speech Technical Metadata component</Description>
     8        <Description>A component for technical information about an entire speech corpus or about a single recording, e.g. the sampling frequency, the compression,...</Description>
    89    </Header>
    910   
    10         <!-- in aparte file zetten en includen! + evt splitsen in verschillende (zie Daan) -->
    11         <CMD_Component name="SpeechTechnicalMetadata" CardinalityMin="0" CardinalityMax="unbounded">
    1211
    13             <CMD_Element name="SamplingFrequency" ValueScheme="decimal"/>
    14             <CMD_Element name="NumberOfChannels" ValueScheme="decimal"/>
    15             <CMD_Element name="ByteOrder">
     12        <CMD_Component name="SpeechTechnicalMetadata">
     13
     14            <CMD_Element name="SamplingFrequency" ValueScheme="decimal" CardinalityMin="0"
     15                   CardinalityMax="unbounded"/>
     16            <CMD_Element name="NumberOfChannels" ValueScheme="decimal" CardinalityMin="0"
     17                   CardinalityMax="unbounded"/>
     18            <CMD_Element name="ByteOrder" ConceptLink="http://www.isocat.org/datcat/DC-2686"
     19                   CardinalityMin="0"
     20                   CardinalityMax="unbounded">
    1621                <ValueScheme>
    1722                    <enumeration>
    18                         <item>big endian</item>
    19                         <item>little endian</item>
     23                        <item ConceptLink="http://www.isocat.org/datcat/DC-2687">big endian</item>
     24                        <item ConceptLink="http://www.isocat.org/datcat/DC-2688">little endian</item>
    2025                    </enumeration>
    2126                </ValueScheme>
    2227            </CMD_Element>
    23             <CMD_Element name="Compression">
     28            <CMD_Element name="Compression" ConceptLink="http://www.isocat.org/datcat/DC-2685"
     29                   CardinalityMin="0"
     30                   CardinalityMax="unbounded">
    2431                <ValueScheme>
    2532                    <enumeration>
     
    3037                </ValueScheme>
    3138            </CMD_Element>
    32             <CMD_Element name="BitResolution" ValueScheme="decimal"/>
    33            
    34             <CMD_Component filename="http://www.clarin.eu/cmd/components/clarin/cmdi-mimetype.xml"/>
    35            
     39            <CMD_Element name="BitResolution" ValueScheme="decimal"
     40                   ConceptLink="http://www.isocat.org/datcat/DC-2684"
     41                   CardinalityMin="0"
     42                   CardinalityMax="unbounded"/>
     43            <!-- within the component SpeechTechnical the component MimeType specifies the mime-type of the corpus, which is a formalized specifier for the format included.-->
     44            <CMD_Component filename="http://www.clarin.eu/cmd/components/clarin/cmdi-mimetype.xml"
     45                     CardinalityMin="0"
     46                     CardinalityMax="unbounded"/>
     47
    3648        </CMD_Component>
    3749
  • metadata/trunk/toolkit/components/clarin-nl/collection/cmdi-text-corpus.xml

    r186 r266  
    55        <ID></ID>
    66        <Name>Text Corpus</Name>
    7         <Description>Text Corpus component</Description>
     7        <Description>A component for metadata which is characteristic for a text corpus</Description>
    88    </Header>
    9     <CMD_Component>
    10        
    11         <!-- where do the texts come from? eg newspaper, internet, etc -->
    12         <CMD_Element name="origin" ValueScheme="string"/>   
    13        
    14         <!-- in aparte file zetten en includen!  -->
    15         <CMD_Component filename="cmdi-text-technical.xml"/>
    16        
    17         <CMD_Component filename="http://www.clarin.eu/cmd/components/clarin-nl/common/component-validation.xml"/>
     9   
     10    <CMD_Component name="TextCorpus" CardinalityMax="1">
     11             
     12        <CMD_Element name="Origin" ValueScheme="string" CardinalityMax="unbounded"/>   
     13        <!-- the component TextTechnical contains technical information about the text corpus, like the mime type and the character encoding -->
     14        <CMD_Component filename="http://www.clarin.eu/cmd/components/clarin-nl/collection/cmdi-text-technical.xml" CardinalityMax="1"/>
    1815       
    1916    </CMD_Component>
  • metadata/trunk/toolkit/components/clarin-nl/collection/cmdi-text-technical.xml

    r233 r266  
    66        <ID/>
    77        <Name>Text Technical Metadata</Name>
    8         <Description>Text Technical Metadata component</Description>
     8        <Description>A component for technical information about a text corpus, like the mime type and the character encoding of the texts</Description>
    99    </Header>
    10 
    11     <CMD_Component name="TextTechnicalMetadata" CardinalityMin="0" CardinalityMax="unbounded">
    12         <CMD_Element name="CharacterEncoding" ValueScheme="string" ConceptLink="http://www.isocat.org/datcat/DC-2564"/>
    13         <CMD_Component filename="http://www.clarin.eu/cmd/components/clarin/cmdi-mimetype.xml"/>
     10 
     11    <CMD_Component name="TextTechnical">
     12        <CMD_Element name="CharacterEncoding" ValueScheme="string" ConceptLink="http://www.isocat.org/datcat/DC-2564" CardinalityMax="unbounded"/>
     13        <!-- within the component TextTechnical the component MimeType specifies the mime-type of the corpus, which is a formalized specifier for the format included.-->
     14        <CMD_Component filename="http://www.clarin.eu/cmd/components/clarin/cmdi-mimetype.xml" CardinalityMax="unbounded"/>
    1415    </CMD_Component>
    1516
Note: See TracChangeset for help on using the changeset viewer.