module namespace cmd-model = "http://spraakbanken.gu.se/clarin/xquery/model"; (: $Id: cmd-model.xqm 301 2010-03-29 13:32:22Z ljo $ :) import module namespace xdb="http://exist-db.org/xquery/xmldb"; import module namespace util="http://exist-db.org/xquery/util"; declare variable $cmd-model:cmdiDatabaseURI as xs:string := "xmldb:exist:///db"; declare variable $cmd-model:commonFreqsPath as xs:string := "/db/common/clarin/freqs"; declare variable $cmd-model:cmdiMirrorPath as xs:string := "/db/cmdi-mirror"; declare variable $cmd-model:getCollections as xs:string := "getCollections"; declare variable $cmd-model:queryModel as xs:string := "queryModel"; declare variable $cmd-model:searchRetrieve as xs:string := "searchRetrieve"; declare variable $cmd-model:typeActorPath as xs:string := "MDGroup/Actors/Actor"; declare variable $cmd-model:typeActorPath0 as xs:string := "Actor"; declare variable $cmd-model:typeActorRolePath as xs:string := "MDGroup/Actors/Actor/Role"; declare variable $cmd-model:docTypeTerms as xs:string := "Terms"; declare variable $cmd-model:docTypeSuffix as xs:string := "Values"; declare variable $cmd-model:responseFormatXml as xs:string := "xml"; declare variable $cmd-model:responseFormatJSon as xs:string := "json"; declare variable $cmd-model:responseFormatText as xs:string := "text"; declare variable $cmd-model:collectionDocName as xs:string := "collection.xml"; declare variable $cmd-model:xmlExt as xs:string := ".xml"; declare function cmd-model:elem($collection as xs:string, $path as xs:string, $depth as xs:integer) as element() { let $path-nodes := util:eval(fn:concat("collection('", $collection, "')//", $path)) let $path-count := count($path-nodes) let $subs := distinct-values($path-nodes/child::element()/name()) let $text-nodes := $path-nodes//text() let $text-count := count($text-nodes) let $text-count-distinct := count(distinct-values($text-nodes)) return { if ($depth > 0) then for $elname in $subs[. != ''] return cmd-model:elem($collection, concat($path, '/', $elname), $depth - 1) else 'maxdepth' } }; declare function cmd-model:paths($n) { for $el in $n return { for $anc in $el/parent::element() return util:node-xpath($anc) } }; (: :) declare function cmd-model:recurse-collections-model($collection as xs:string, $type-name as xs:string, $depth as xs:integer) as item()* { let $children := xdb:get-child-collections($collection) return if (fn:exists($children)) then let $child-results := for $child in $children return cmd-model:recurse-collections-model(fn:concat($collection, '/', xs:string($child)), $type-name, $depth), $current := cmd-model:create-doc($collection, $type-name, $depth) return ($current, $child-results) else cmd-model:create-doc($collection, $type-name, $depth) }; (: Recurse for collections :) declare function cmd-model:recurse-collections($collection as xs:string, $depth as xs:integer) as item()* { let $children := xdb:get-child-collections($collection) return if (fn:exists($children)) then let $child-results := for $child in $children return cmd-model:recurse-collections(concat($collection, '/', xs:string($child)), $depth), $current := {$collection} return ($current, $child-results) else {$collection} }; (: :) declare function cmd-model:create-doc($collection as xs:string, $type-name as xs:string, $depth as xs:integer) as xs:string* { (: if newer data available :) cmd-model:store-result($collection, cmd-model:elem($collection, $type-name, $depth), $type-name, $depth) (:else () :) }; (: :) declare function cmd-model:get-result-doc($collection as xs:string, $type-name as xs:string, $depth as xs:integer) as item()* { let $name-last := text:groups($type-name, "/(\w+)$")[last()], $new-name := if (fn:empty($name-last)) then $type-name else $name-last, $dummy := if (cmd-model:is-result-available($collection, fn:concat("/", $new-name, xs:string($depth)))) then () else cmd-model:create-doc($collection, $type-name, $depth) return fn:doc(fn:concat($collection, "/", $new-name, xs:string($depth), $cmd-model:xmlExt)) }; (: Generic get-doc(collection, docname) :) declare function cmd-model:get-doc($collection as xs:string, $doc-name as xs:string) as item()* { fn:doc(fn:concat($collection, "/", $doc-name)) }; (: Function for telling wether the result is already available or not. :) declare function cmd-model:is-result-available($collection as xs:string, $result-ref as xs:string) as xs:boolean { fn:doc-available(fn:concat($collection, $result-ref, $cmd-model:xmlExt)) }; (: Function for telling wether the document is available or not. :) declare function cmd-model:is-doc-available($collection as xs:string, $doc-name as xs:string) as xs:boolean { fn:doc-available(fn:concat($collection, "/", $doc-name)) }; (: Store the calculated frequencies for reuse. If more than one collection is given the result is stored in the common collection for reuse. :) declare function cmd-model:store-result($coll-names as xs:string+, $entries as element()*, $type-name as xs:string, $depth as xs:integer) as xs:string { let $clarin-writer := fn:doc("/db/clarin/writer.xml"), $dummy := xdb:login($cmd-model:cmdiDatabaseURI, $clarin-writer//write-user/text(), $clarin-writer//write-user-cred/text()) return if (fn:exists($coll-names[2])) then (: Det gäller fler än en samling. :) xdb:store($cmd-model:commonFreqsPath, cmd-model:make-compound-doc-name($coll-names, $type-name, $depth), cmd-model:make-doc-element-of-type($type-name, $coll-names, $entries, $depth)) else (: Det gäller endast en samling. :) let $dummy := util:log('debug', fn:concat('Stores ', $type-name, ' in ', $coll-names)) return xdb:store($coll-names, cmd-model:make-doc-name($coll-names, $type-name, xs:string($depth), fn:false()), cmd-model:make-doc-element-of-type($type-name, (), $entries, xs:string($depth))) }; (: Store the collection listing in give collection. :) declare function cmd-model:store-collection-data($data as node(), $collection-path as xs:string) as xs:string? { let $clarin-writer := fn:doc("/db/clarin/writer.xml"), $dummy := xdb:login($cmd-model:cmdiDatabaseURI, $clarin-writer//write-user/text(), $clarin-writer//write-user-cred/text()) return (: util:catch("org.exist.xquery.XPathException", :) xdb:store($collection-path, $cmd-model:collectionDocName, $data)(: , ()) :) }; (: Create document name for type () with or without collection path. :) declare function cmd-model:make-doc-name($coll-name as xs:string?, $type-name as xs:string, $depth as xs:string, $incl-path as xs:boolean) as xs:string { let $doc-name := fn:concat($type-name, $depth, $cmd-model:xmlExt) return if ($incl-path) then fn:concat($coll-name, "/", $doc-name) else $doc-name }; (: Create document name with md5-hash for selected collections (or types) for reuse. :) declare function cmd-model:make-compound-doc-name($coll-names as xs:string+, $type-name as xs:string, $depth as xs:string) as xs:string { let $name-prefix := fn:concat($type-name, $depth) return fn:concat($name-prefix, "-", util:hash(string-join($coll-names, ""), "MD5"), $cmd-model:xmlExt) }; (: Skapa ett element av angiven typ. :) declare function cmd-model:make-element-of-type($type-name as xs:string, $count as xs:string, $text-count as xs:string, $text-types-count as xs:string, $value as xs:string) as element() { element {$type-name} { attribute count {$freq}, attribute text-count {$rank}, attribute text-types-count {$text-types}, text {$value} } }; (: Skapa ett dokumentelement av angiven typ. :) declare function cmd-model:make-doc-element-of-type($type-name as xs:string, $coll-names as xs:string*, $entries as element()*, $depth as xs:string) as element() { let $depth-value := attribute depth {$depth}, $coll-names-value := if (fn:empty($coll-names)) then () else attribute colls {fn:string-join($coll-names, ",")} return element {cmd-model:get-doc-type-element-name($type-name)} { $depth-value, $coll-names-value, attribute created {fn:current-dateTime()}, $entries } }; (: Skapa elementnamn för dokumentet av typ. :) declare function cmd-model:get-doc-type-element-name($type-name as xs:string) as xs:string { $cmd-model:docTypeTerms }; (: Seraliseringsformat. :) declare function cmd-model:serialise-as($item as node()?, $format as xs:string) as item()? { if ($format eq $cmd-model:responseFormatJSon) then let $option := util:declare-option("exist:serialize", "method=text media-type=application/json") return (: json:xml-to-json($item) :) $item else (: $cmd-model:responseFormatXml, $cmd-model:responseFormatText:) $item }; (:~ API function queryModel. :) declare function cmd-model:query-model($cmd-index-path as xs:string, $collection as xs:string+, $format as xs:string, $max-depth as xs:integer) as item() { cmd-model:serialise-as(cmd-model:get-result-doc($collection, $cmd-index-path, $max-depth), $format) }; (:~ API function getCollections. :) declare function cmd-model:get-collections($collections as xs:string+, $format as xs:string, $max-depth as xs:integer) as item() { (: fixme! - collections, is-doc-available only takes one collection.:) let $dummy := if (cmd-model:is-doc-available($collections, $cmd-model:collectionDocName)) then () else let $children := for $collection-item in $collections return cmd-model:recurse-collections($collection-item, $max-depth) return cmd-model:store-collection-data({$children}, $collections) return cmd-model:serialise-as(cmd-model:get-doc($collections, $cmd-model:collectionDocName), $format) }; (:~ API function searchRetrieve. :) declare function cmd-model:search-retrieve($cql-query as xs:string, $collection as xs:string+, $format as xs:string, $start-item as xs:integer, $end-item as xs:integer) as item()* { let $results := util:eval(fn:concat("collection('", xdb:decode($collection), "')", xdb:decode($cql-query), "/ancestor::CMD")), $result-count := fn:count($results), $result-seq := fn:subsequence($results, $start-item, $end-item), $seq-count := fn:count($result-seq), $result-fragment := {$result-count} {$cql-query, $collection, $start-item, $end-item} {$seq-count} {$result-seq} return cmd-model:serialise-as($result-fragment, $format) }; (: {cmdComponent} //{cmdComponent} Actor //Actor {cmdPath}. //{cmdPath}/{cmdComponent} Actor.Contact.Phone //Actor/Contact/Phone {cmdIndex} {rel} {term} //{cmdIndex}[\. {rel} '{term}'] Actors.Actor.Sex=f //Actors/Actor/Sex[.='f'] {cmdIndex} any {term} //{cmdIndex}[contains(. '{term}')] Organisation.Name any University //Organisation/Name[contains(.,'University')] and, or, and not ?! Organisation.Name any University and Actor.gender=m ?! //MDGroup[Actors/Actor/Role[.='sponsor'] and Actors/Actor/Name[contains(.,'a')]] //Title[starts-with(.,'a')] //Title[starts-with(.,'A')] //Title[contains(.,'analysis')] http://demo.spraakdata.gu.se/clarin/cmd/model/stats?operation=searchRetrieve&query=//Title[contains(.,'analysis')]&collection= info:srw/schema/1/dc-v1.1 xml This is a Sample Record 1 0.965 integer query itself (together with the context-collection) if necessary .... :)