[124] | 1 | module namespace cmd-model = "http://spraakbanken.gu.se/clarin/xquery/model"; |
---|
| 2 | |
---|
| 3 | (: |
---|
| 4 | $Id: cmd-model.xqm 301 2010-03-29 13:32:22Z ljo $ |
---|
| 5 | :) |
---|
| 6 | |
---|
| 7 | import module namespace xdb="http://exist-db.org/xquery/xmldb"; |
---|
| 8 | import module namespace util="http://exist-db.org/xquery/util"; |
---|
| 9 | |
---|
[246] | 10 | declare variable $cmd-model:cmdiDatabaseURI as xs:string := "xmldb:exist:///db"; |
---|
[124] | 11 | |
---|
[246] | 12 | declare variable $cmd-model:commonFreqsPath as xs:string := "/db/common/clarin/freqs"; |
---|
| 13 | declare variable $cmd-model:cmdiMirrorPath as xs:string := "/db/cmdi-mirror"; |
---|
[124] | 14 | |
---|
[254] | 15 | declare variable $cmd-model:getCollections as xs:string := "getCollections"; |
---|
| 16 | declare variable $cmd-model:queryModel as xs:string := "queryModel"; |
---|
| 17 | declare variable $cmd-model:searchRetrieve as xs:string := "searchRetrieve"; |
---|
| 18 | |
---|
[246] | 19 | declare variable $cmd-model:typeActorPath as xs:string := "MDGroup/Actors/Actor"; |
---|
| 20 | declare variable $cmd-model:typeActorPath0 as xs:string := "Actor"; |
---|
| 21 | declare variable $cmd-model:typeActorRolePath as xs:string := "MDGroup/Actors/Actor/Role"; |
---|
[124] | 22 | |
---|
[246] | 23 | declare variable $cmd-model:docTypeTerms as xs:string := "Terms"; |
---|
| 24 | declare variable $cmd-model:docTypeSuffix as xs:string := "Values"; |
---|
[124] | 25 | |
---|
[258] | 26 | declare variable $cmd-model:responseFormatXml as xs:string := "xml"; |
---|
| 27 | declare variable $cmd-model:responseFormatJSon as xs:string := "json"; |
---|
| 28 | declare variable $cmd-model:responseFormatText as xs:string := "text"; |
---|
| 29 | |
---|
[298] | 30 | declare variable $cmd-model:collectionDocName as xs:string := "collection.xml"; |
---|
| 31 | |
---|
[246] | 32 | declare variable $cmd-model:xmlExt as xs:string := ".xml"; |
---|
| 33 | |
---|
[124] | 34 | declare function cmd-model:elem($collection as xs:string, $path as xs:string, $depth as xs:integer) as element() { |
---|
[174] | 35 | let $path-nodes := util:eval(fn:concat("collection('", $collection, "')//", $path)) |
---|
[124] | 36 | let $path-count := count($path-nodes) |
---|
| 37 | |
---|
| 38 | let $subs := distinct-values($path-nodes/child::element()/name()) |
---|
| 39 | let $text-nodes := $path-nodes//text() |
---|
| 40 | let $text-count := count($text-nodes) |
---|
| 41 | let $text-count-distinct := count(distinct-values($text-nodes)) |
---|
| 42 | return |
---|
[298] | 43 | <Term path="{fn:concat("//", $path)}" name="{text:groups($path, "/([^/]+)$")[last()]}" count="{$path-count}" count_text="{$text-count}" count_distinct_text="{$text-count-distinct}">{ |
---|
[254] | 44 | if ($depth > 0) then |
---|
[124] | 45 | for $elname in $subs[. != ''] |
---|
| 46 | return |
---|
| 47 | cmd-model:elem($collection, concat($path, '/', $elname), $depth - 1) |
---|
| 48 | else 'maxdepth' |
---|
[251] | 49 | }</Term> |
---|
[124] | 50 | }; |
---|
| 51 | |
---|
| 52 | declare function cmd-model:paths($n) { |
---|
| 53 | for $el in $n |
---|
[251] | 54 | return <Term name="{$el/name()}"> { |
---|
[124] | 55 | for $anc in $el/parent::element() |
---|
| 56 | return util:node-xpath($anc) |
---|
[251] | 57 | }</Term> |
---|
[124] | 58 | }; |
---|
| 59 | |
---|
| 60 | (: |
---|
| 61 | |
---|
| 62 | :) |
---|
| 63 | declare function cmd-model:recurse-collections-model($collection as xs:string, $type-name as xs:string, $depth as xs:integer) as item()* { |
---|
| 64 | let $children := xdb:get-child-collections($collection) |
---|
| 65 | return |
---|
| 66 | if (fn:exists($children)) then |
---|
| 67 | let $child-results := |
---|
| 68 | for $child in $children |
---|
| 69 | return |
---|
| 70 | cmd-model:recurse-collections-model(fn:concat($collection, '/', xs:string($child)), $type-name, $depth), |
---|
| 71 | $current := cmd-model:create-doc($collection, $type-name, $depth) |
---|
| 72 | return ($current, $child-results) |
---|
| 73 | else |
---|
| 74 | cmd-model:create-doc($collection, $type-name, $depth) |
---|
| 75 | }; |
---|
| 76 | |
---|
| 77 | (: |
---|
[258] | 78 | Recurse for collections |
---|
| 79 | :) |
---|
| 80 | declare function cmd-model:recurse-collections($collection as xs:string, $depth as xs:integer) as item()* { |
---|
| 81 | let $children := xdb:get-child-collections($collection) |
---|
| 82 | return |
---|
| 83 | if (fn:exists($children)) then |
---|
| 84 | let $child-results := |
---|
| 85 | for $child in $children |
---|
| 86 | return |
---|
| 87 | cmd-model:recurse-collections(concat($collection, '/', xs:string($child)), $depth), |
---|
[298] | 88 | $current := <Collection name="{text:groups($collection, "/([^/]+)$")[last()]}">{$collection}</Collection> |
---|
[258] | 89 | return ($current, $child-results) |
---|
| 90 | else |
---|
| 91 | <Collection>{$collection}</Collection> |
---|
| 92 | }; |
---|
[124] | 93 | |
---|
[258] | 94 | (: |
---|
| 95 | |
---|
[124] | 96 | :) |
---|
| 97 | declare function cmd-model:create-doc($collection as xs:string, $type-name as xs:string, $depth as xs:integer) as xs:string* { |
---|
| 98 | (: if newer data available :) |
---|
| 99 | cmd-model:store-result($collection, cmd-model:elem($collection, $type-name, $depth), $type-name, $depth) |
---|
| 100 | (:else () :) |
---|
| 101 | }; |
---|
| 102 | |
---|
| 103 | (: |
---|
| 104 | |
---|
| 105 | :) |
---|
| 106 | declare function cmd-model:get-result-doc($collection as xs:string, $type-name as xs:string, $depth as xs:integer) as item()* { |
---|
[125] | 107 | let $name-last := text:groups($type-name, "/(\w+)$")[last()], |
---|
| 108 | $new-name := if (fn:empty($name-last)) then $type-name else $name-last, |
---|
| 109 | $dummy := if (cmd-model:is-result-available($collection, fn:concat("/", $new-name, xs:string($depth)))) then |
---|
[124] | 110 | () |
---|
| 111 | else |
---|
| 112 | cmd-model:create-doc($collection, $type-name, $depth) |
---|
| 113 | return |
---|
[174] | 114 | fn:doc(fn:concat($collection, "/", $new-name, xs:string($depth), $cmd-model:xmlExt)) |
---|
[124] | 115 | }; |
---|
| 116 | |
---|
| 117 | (: |
---|
[298] | 118 | Generic get-doc(collection, docname) |
---|
| 119 | :) |
---|
| 120 | declare function cmd-model:get-doc($collection as xs:string, $doc-name as xs:string) as item()* { |
---|
| 121 | fn:doc(fn:concat($collection, "/", $doc-name)) |
---|
| 122 | }; |
---|
| 123 | |
---|
| 124 | |
---|
| 125 | (: |
---|
[124] | 126 | Function for telling wether the result is already available or not. |
---|
| 127 | :) |
---|
| 128 | declare function cmd-model:is-result-available($collection as xs:string, $result-ref as xs:string) as xs:boolean { |
---|
| 129 | fn:doc-available(fn:concat($collection, $result-ref, $cmd-model:xmlExt)) |
---|
| 130 | }; |
---|
| 131 | |
---|
| 132 | (: |
---|
[298] | 133 | Function for telling wether the document is available or not. |
---|
| 134 | :) |
---|
| 135 | declare function cmd-model:is-doc-available($collection as xs:string, $doc-name as xs:string) as xs:boolean { |
---|
| 136 | fn:doc-available(fn:concat($collection, "/", $doc-name)) |
---|
| 137 | }; |
---|
| 138 | |
---|
| 139 | (: |
---|
[124] | 140 | Store the calculated frequencies for reuse. |
---|
| 141 | If more than one collection is given the result is stored in the common |
---|
| 142 | collection for reuse. |
---|
| 143 | :) |
---|
| 144 | declare function cmd-model:store-result($coll-names as xs:string+, $entries as element()*, $type-name as xs:string, $depth as xs:integer) as xs:string { |
---|
[246] | 145 | let $clarin-writer := fn:doc("/db/clarin/writer.xml"), |
---|
| 146 | $dummy := xdb:login($cmd-model:cmdiDatabaseURI, $clarin-writer//write-user/text(), $clarin-writer//write-user-cred/text()) |
---|
| 147 | return |
---|
| 148 | |
---|
| 149 | if (fn:exists($coll-names[2])) then |
---|
| 150 | (: Det gÀller fler Àn en samling. :) |
---|
| 151 | xdb:store($cmd-model:commonFreqsPath, cmd-model:make-compound-doc-name($coll-names, $type-name, $depth), cmd-model:make-doc-element-of-type($type-name, $coll-names, $entries, $depth)) |
---|
| 152 | else |
---|
| 153 | (: Det gÀller endast en samling. :) |
---|
| 154 | let $dummy := util:log('debug', fn:concat('Stores ', $type-name, ' in ', $coll-names)) |
---|
| 155 | return xdb:store($coll-names, cmd-model:make-doc-name($coll-names, $type-name, xs:string($depth), fn:false()), cmd-model:make-doc-element-of-type($type-name, (), $entries, xs:string($depth))) |
---|
[124] | 156 | }; |
---|
| 157 | |
---|
| 158 | (: |
---|
[298] | 159 | Store the collection listing in give collection. |
---|
| 160 | :) |
---|
| 161 | declare function cmd-model:store-collection-data($data as node(), $collection-path as xs:string) as xs:string? { |
---|
| 162 | let $clarin-writer := fn:doc("/db/clarin/writer.xml"), |
---|
| 163 | $dummy := xdb:login($cmd-model:cmdiDatabaseURI, $clarin-writer//write-user/text(), $clarin-writer//write-user-cred/text()) |
---|
| 164 | return |
---|
| 165 | (: util:catch("org.exist.xquery.XPathException", :) xdb:store($collection-path, $cmd-model:collectionDocName, $data)(: , ()) :) |
---|
| 166 | }; |
---|
| 167 | |
---|
| 168 | (: |
---|
[124] | 169 | Create document name for type () with or without collection path. |
---|
| 170 | :) |
---|
| 171 | declare function cmd-model:make-doc-name($coll-name as xs:string?, $type-name as xs:string, $depth as xs:string, $incl-path as xs:boolean) as xs:string { |
---|
| 172 | let $doc-name := fn:concat($type-name, $depth, $cmd-model:xmlExt) |
---|
| 173 | return |
---|
| 174 | if ($incl-path) then |
---|
| 175 | fn:concat($coll-name, "/", $doc-name) |
---|
| 176 | else |
---|
| 177 | $doc-name |
---|
| 178 | }; |
---|
| 179 | |
---|
| 180 | (: |
---|
| 181 | Create document name with md5-hash for selected collections (or types) for reuse. |
---|
| 182 | :) |
---|
| 183 | declare function cmd-model:make-compound-doc-name($coll-names as xs:string+, $type-name as xs:string, $depth as xs:string) as xs:string { |
---|
| 184 | let $name-prefix := fn:concat($type-name, $depth) |
---|
| 185 | return |
---|
| 186 | fn:concat($name-prefix, "-", util:hash(string-join($coll-names, ""), "MD5"), $cmd-model:xmlExt) |
---|
| 187 | }; |
---|
| 188 | |
---|
| 189 | (: |
---|
| 190 | Skapa ett element av angiven typ. |
---|
| 191 | :) |
---|
| 192 | declare function cmd-model:make-element-of-type($type-name as xs:string, $count as xs:string, $text-count as xs:string, $text-types-count as xs:string, $value as xs:string) as element() { |
---|
| 193 | element {$type-name} { |
---|
| 194 | |
---|
| 195 | attribute count {$freq}, |
---|
| 196 | attribute text-count {$rank}, |
---|
| 197 | attribute text-types-count {$text-types}, |
---|
| 198 | text {$value} |
---|
| 199 | } |
---|
| 200 | }; |
---|
| 201 | |
---|
| 202 | (: |
---|
| 203 | Skapa ett dokumentelement av angiven typ. |
---|
| 204 | :) |
---|
| 205 | declare function cmd-model:make-doc-element-of-type($type-name as xs:string, $coll-names as xs:string*, $entries as element()*, $depth as xs:string) as element() { |
---|
| 206 | let $depth-value := attribute depth {$depth}, |
---|
| 207 | $coll-names-value := if (fn:empty($coll-names)) then () else attribute colls {fn:string-join($coll-names, ",")} |
---|
| 208 | return |
---|
| 209 | element {cmd-model:get-doc-type-element-name($type-name)} { |
---|
| 210 | $depth-value, |
---|
| 211 | $coll-names-value, |
---|
| 212 | attribute created {fn:current-dateTime()}, |
---|
| 213 | $entries |
---|
| 214 | } |
---|
| 215 | }; |
---|
| 216 | |
---|
| 217 | (: |
---|
[246] | 218 | Skapa elementnamn för dokumentet av typ. |
---|
[124] | 219 | :) |
---|
| 220 | declare function cmd-model:get-doc-type-element-name($type-name as xs:string) as xs:string { |
---|
[251] | 221 | $cmd-model:docTypeTerms |
---|
[124] | 222 | }; |
---|
[256] | 223 | |
---|
[298] | 224 | |
---|
| 225 | |
---|
| 226 | |
---|
[258] | 227 | (: |
---|
| 228 | Seraliseringsformat. |
---|
[256] | 229 | :) |
---|
[292] | 230 | declare function cmd-model:serialise-as($item as node()?, $format as xs:string) as item()? { |
---|
[258] | 231 | if ($format eq $cmd-model:responseFormatJSon) then |
---|
[256] | 232 | let $option := util:declare-option("exist:serialize", "method=text media-type=application/json") |
---|
| 233 | return |
---|
[258] | 234 | (: json:xml-to-json($item) :) $item |
---|
| 235 | else (: $cmd-model:responseFormatXml, $cmd-model:responseFormatText:) |
---|
| 236 | $item |
---|
[256] | 237 | }; |
---|
[258] | 238 | |
---|
| 239 | |
---|
| 240 | (:~ |
---|
| 241 | API function queryModel. |
---|
| 242 | :) |
---|
| 243 | declare function cmd-model:query-model($cmd-index-path as xs:string, $collection as xs:string+, $format as xs:string, $max-depth as xs:integer) as item() { |
---|
| 244 | cmd-model:serialise-as(cmd-model:get-result-doc($collection, $cmd-index-path, $max-depth), $format) |
---|
| 245 | }; |
---|
| 246 | |
---|
| 247 | (:~ |
---|
| 248 | API function getCollections. |
---|
| 249 | :) |
---|
| 250 | declare function cmd-model:get-collections($collections as xs:string+, $format as xs:string, $max-depth as xs:integer) as item() { |
---|
[298] | 251 | (: fixme! - collections, is-doc-available only takes one collection.:) |
---|
| 252 | let $dummy := if (cmd-model:is-doc-available($collections, $cmd-model:collectionDocName)) then |
---|
| 253 | () |
---|
| 254 | else |
---|
[258] | 255 | let $children := for $collection-item in $collections |
---|
| 256 | return |
---|
| 257 | cmd-model:recurse-collections($collection-item, $max-depth) |
---|
| 258 | return |
---|
[298] | 259 | cmd-model:store-collection-data(<Collections count="{count($children)}" root="{$collections}">{$children}</Collections>, $collections) |
---|
| 260 | return |
---|
| 261 | cmd-model:serialise-as(cmd-model:get-doc($collections, $cmd-model:collectionDocName), $format) |
---|
[258] | 262 | }; |
---|
[286] | 263 | |
---|
| 264 | (:~ |
---|
[292] | 265 | API function searchRetrieve. |
---|
[286] | 266 | :) |
---|
[292] | 267 | declare function cmd-model:search-retrieve($cql-query as xs:string, $collection as xs:string+, $format as xs:string, $start-item as xs:integer, $end-item as xs:integer) as item()* { |
---|
| 268 | let $results := util:eval(fn:concat("collection('", xdb:decode($collection), "')", xdb:decode($cql-query), "/ancestor::CMD")), |
---|
| 269 | $result-count := fn:count($results), |
---|
[298] | 270 | $result-seq := fn:subsequence($results, $start-item, $end-item), |
---|
[292] | 271 | $seq-count := fn:count($result-seq), |
---|
| 272 | $result-fragment := |
---|
[298] | 273 | <searchRetrieveResponse> |
---|
| 274 | <numberOfRecords>{$result-count}</numberOfRecords> |
---|
| 275 | <echoedSearchRetrieveRequest>{$cql-query, $collection, $start-item, $end-item}</echoedSearchRetrieveRequest> |
---|
| 276 | <diagnostics>{$seq-count}</diagnostics> |
---|
| 277 | <records> |
---|
[292] | 278 | {$result-seq} |
---|
[298] | 279 | </records> |
---|
| 280 | </searchRetrieveResponse> |
---|
[286] | 281 | |
---|
[292] | 282 | return |
---|
| 283 | cmd-model:serialise-as($result-fragment, $format) |
---|
| 284 | |
---|
[286] | 285 | }; |
---|
[292] | 286 | (: |
---|
| 287 | {cmdComponent} //{cmdComponent} Actor //Actor |
---|
| 288 | {cmdPath}. //{cmdPath}/{cmdComponent} Actor.Contact.Phone //Actor/Contact/Phone |
---|
| 289 | {cmdIndex} {rel} {term} //{cmdIndex}[\. {rel} '{term}'] Actors.Actor.Sex=f //Actors/Actor/Sex[.='f'] |
---|
| 290 | {cmdIndex} any {term} //{cmdIndex}[contains(. '{term}')] Organisation.Name any University //Organisation/Name[contains(.,'University')] |
---|
| 291 | and, or, and not ?! Organisation.Name any University and Actor.gender=m ?! |
---|
[286] | 292 | |
---|
[292] | 293 | //MDGroup[Actors/Actor/Role[.='sponsor'] and Actors/Actor/Name[contains(.,'a')]] |
---|
| 294 | //Title[starts-with(.,'a')] |
---|
| 295 | //Title[starts-with(.,'A')] |
---|
| 296 | //Title[contains(.,'analysis')] |
---|
| 297 | http://demo.spraakdata.gu.se/clarin/cmd/model/stats?operation=searchRetrieve&query=//Title[contains(.,'analysis')]&collection= |
---|
| 298 | |
---|
| 299 | <record> |
---|
| 300 | <recordSchema>info:srw/schema/1/dc-v1.1</recordSchema> |
---|
| 301 | <recordPacking>xml</recordPacking> |
---|
| 302 | <recordData> |
---|
| 303 | <srw_dc:dc xmlns:srw_dc="info:srw/schema/1/dc-v1.1"> |
---|
| 304 | <dc:title>This is a Sample Record</dc:title> |
---|
| 305 | </srw_dc:dc> |
---|
| 306 | </recordData> |
---|
| 307 | <recordPosition>1</recordPosition> |
---|
| 308 | <extraRecordData> |
---|
| 309 | <rel:score xmlns:rel="info:srw/extensions/2/rel-1.0"> |
---|
| 310 | 0.965 |
---|
| 311 | </rel:rank> |
---|
| 312 | </extraRecordData> |
---|
| 313 | </record> |
---|
| 314 | |
---|
| 315 | <searchRetrieveResponse> |
---|
| 316 | <numberOfRecords>integer</numberOfRecords> |
---|
| 317 | <echoedSearchRetrieveRequest>query itself (together with the context-collection) </echoedSearchRetrieveRequest> |
---|
| 318 | <diagnostics>if necessary</diagnostics> |
---|
| 319 | <records> |
---|
| 320 | .... |
---|
| 321 | </records> |
---|
| 322 | </searchRetrieveResponse> |
---|
| 323 | |
---|
| 324 | :) |
---|