wiki:CMD2RDF/Queries

SPARQL Queries for CMD2RDF

CMDM - Model

Profiles and Components - Schemas

List all profiles with their id and number of instances:

PREFIX cmdm: <http://www.clarin.eu/cmd/general.rdf#>
PREFIX dcterms: <http://purl.org/dc/terms/>

SELECT SAMPLE(?p) as  ?profile SAMPLE(?pid) as ?pid COUNT(?i) as ?count WHERE { ?p rdfs:subClassOf cmdm:Profile. 
?p dcterms:identifier ?pid.
?i a ?p.
 }
GROUP by ?p ?pid
ORDER BY DESC(?count)

result in clarin.aac.ac.at-virtuoso, ​just profiles

List the cmdm:contains relation pairs, grouped by type with count:

SELECT (SAMPLE(?t1) AS ?PARENT) (SAMPLE(?t2) AS ?CHILD) (COUNT(?c2) as ?COUNT)
WHERE { 
   ?c1 <http://www.clarin.eu/cmd/general.rdf#contains> ?c2.
   ?c1 a ?t1. ?c2 a ?t2.
 } 
GROUP BY ?t1 ?t2
ORDER BY ?t1 ?t2

result

Inspect data categories

List all used datcats, with counts:

PREFIX dcr: <http://www.isocat.org/ns/dcr.rdf#>

SELECT SAMPLE(?datcat) COUNT(?e) as ?count
WHERE {?e dcr:datcat ?datcat}
GROUP BY ?datcat
ORDER BY DESC(?count)

List all used datcats, with english labels and counts:

PREFIX dcr: <http://www.isocat.org/ns/dcr.rdf#>

SELECT SAMPLE(?datcatid) SAMPLE(?datcatlabel) COUNT(?e) as ?count
WHERE {?e dcr:datcat ?datcatid.
       ?datcat dcr:datcat ?datcatid.
       ?datcat rdfs:label ?datcatlabel.
  FILTER(langMatches(lang(?datcatlabel), "EN"))
}
GROUP BY ?datcatid ?datcatlabel
ORDER BY DESC(?count)

datcat with label and counts

List all relations from the relation registry:

SELECT ?dc1 ?reltype ?dc2 
WHERE { 
       { GRAPH <http://www.clarin.eu/cmd/rr-cmdi.rdf> {?dc1 ?reltype ?dc2 } }
 UNION { GRAPH <http://www.clarin.eu/cmd/rr-dc.rdf> {?dc1 ?reltype ?dc2 } }
}

List values in fields for given data category (DC-2544 resourceTitle):

PREFIX dcr: <http://www.isocat.org/ns/dcr.rdf#>

SELECT ?v as ?resourcetitle
WHERE {?cmdelem dcr:datcat "http://www.isocat.org/datcat/DC-2544".
       ?instelem a ?cmdelem.
       ?instelem ?p ?v.
FILTER isLiteral(?v)
}
ORDER BY ?v

Datcats with the number of elements they are references by and the number of corresponding instances:

PREFIX dcr: <http://www.isocat.org/ns/dcr.rdf#>

SELECT SAMPLE(?datcatid) COUNT(distinct ?cmdelem) as ?count COUNT(?instelem) as ?count_instances
WHERE {?cmdelem  dcr:datcat ?datcatid.      
       ?instelem a ?cmdelem. 
}
GROUP BY ?datcatid 
ORDER BY DESC(?count_instances)

Inspect instance data

Language

List distinct used LanguageNames:

PREFIX cmd-lang: <http://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/components/clarin.eu:cr1:c_1271859438111/rdf#>

SELECT SAMPLE(?v) COUNT(?v)
WHERE { 
   [] cmd-lang:Language.hasLanguageNameElementValue ?v.
 } 
GROUP BY ?v

result

List distinct LangNames? and corresponding LangCodes? order by frequency:

PREFIX cmdm: <http://www.clarin.eu/cmd/general.rdf#>
PREFIX cmd-lang: <http://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/components/clarin.eu:cr1:c_1271859438111/rdf#>
PREFIX cmd-iso639: <http://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/components/clarin.eu:cr1:c_1271859438110/rdf#>

SELECT SAMPLE(?langnamevalue) as ?langname SAMPLE(?iso639codevalue) as ?langcode COUNT(?langnamevalue) as ?count
WHERE { 
   ?langname cmd-lang:Language.hasLanguageNameElementValue ?langnamevalue.
   ?lang cmdm:contains ?langname.
   ?lang cmdm:contains ?iso639.
   ?iso639 cmdm:contains ?iso639code.
   ?iso639code a cmd-iso639:ISO639.iso-639-3-code.
   ?iso639code cmd-iso639:ISO639.hasiso-639-3-codeElementValue ?iso639codevalue.
 } 
GROUP BY ?langnamevalue  ?iso639codevalue
ORDER BY DESC(?count)

result

Search by DatCat

Search in description fields for string syntax or parse, get the field value and the resource-identifier:

PREFIX dcr: <http://www.isocat.org/ns/dcr.rdf#>

SELECT ?g as ?resGraph ?v as ?resourcetitle
WHERE { GRAPH ?g {?cmdelem dcr:datcat "http://www.isocat.org/datcat/DC-2520".}
       ?instelem a ?cmdelem.
       ?instelem ?p ?v.

FILTER isLiteral(?v).
FILTER REGEX(?v, "(syntax|parse)", "i")
}

syntax or parse in description (by datcat: Description DC-2520)

Traverse cmdm:contains

List all elements starting from given root component, show the element type and the value:

PREFIX oa: <http://www.w3.org/ns/oa#>
PREFIX cmdm: <http://www.clarin.eu/cmd/general.rdf#>

SELECT ?elemtype ?value 
WHERE {?rootcomponent a <http://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/profiles/clarin.eu:cr1:p_1290431694579/rdf#LexicalResourceProfile>.
 ?rootcomponent cmdm:contains* ?comp. 
 ?comp cmdm:contains ?elem.
 ?elem a ?elemtype.
 ?elem ?haselemvalue ?value.
  ?elemtype rdfs:subClassOf cmdm:Element.
FILTER( isLiteral(?value))
FILTER( regex(?value,'.'))
}

elements with values for LexicalResourceProfile

Last modified 10 years ago Last modified on 05/27/14 10:30:49