1 | module namespace cmd-model = "http://spraakbanken.gu.se/clarin/xquery/model"; |
---|
2 | |
---|
3 | (: |
---|
4 | $Id: cmd-model.xqm 726 2010-09-28 11:31:03Z vronk $ |
---|
5 | :) |
---|
6 | |
---|
7 | import module namespace xdb="http://exist-db.org/xquery/xmldb"; |
---|
8 | import module namespace util="http://exist-db.org/xquery/util"; |
---|
9 | |
---|
10 | declare variable $cmd-model:cmdiDatabaseURI as xs:string := "xmldb:exist:///db"; |
---|
11 | |
---|
12 | declare variable $cmd-model:commonFreqsPath as xs:string := "/db/common/clarin/freqs"; |
---|
13 | declare variable $cmd-model:cmdiMirrorPath as xs:string := "/db/cmdi-mirror"; |
---|
14 | |
---|
15 | declare variable $cmd-model:getCollections as xs:string := "getCollections"; |
---|
16 | declare variable $cmd-model:queryModel as xs:string := "queryModel"; |
---|
17 | declare variable $cmd-model:searchRetrieve as xs:string := "searchRetrieve"; |
---|
18 | |
---|
19 | declare variable $cmd-model:typeActorPath as xs:string := "MDGroup/Actors/Actor"; |
---|
20 | declare variable $cmd-model:typeActorPath0 as xs:string := "Actor"; |
---|
21 | declare variable $cmd-model:typeActorRolePath as xs:string := "MDGroup/Actors/Actor/Role"; |
---|
22 | |
---|
23 | declare variable $cmd-model:docTypeTerms as xs:string := "Terms"; |
---|
24 | declare variable $cmd-model:docTypeSuffix as xs:string := "Values"; |
---|
25 | |
---|
26 | declare variable $cmd-model:responseFormatXml as xs:string := "xml"; |
---|
27 | declare variable $cmd-model:responseFormatJSon as xs:string := "json"; |
---|
28 | declare variable $cmd-model:responseFormatText as xs:string := "text"; |
---|
29 | |
---|
30 | declare variable $cmd-model:collectionDocName as xs:string := "collection.xml"; |
---|
31 | |
---|
32 | declare variable $cmd-model:collectionRoot as xs:string := "root"; |
---|
33 | |
---|
34 | declare variable $cmd-model:xmlExt as xs:string := ".xml"; |
---|
35 | |
---|
36 | declare variable $cmd-model:valuesLimit as xs:integer := 100; |
---|
37 | |
---|
38 | |
---|
39 | |
---|
40 | declare function cmd-model:elem($collections as xs:string+, $path as xs:string, $depth as xs:integer) as element() { |
---|
41 | (: fixme! - handle multiple collections :) |
---|
42 | cmd-model:elem-r($collections, $path, $depth, $depth) |
---|
43 | }; |
---|
44 | |
---|
45 | declare function cmd-model:elem-r($collections as xs:string+, $path as xs:string, $max-depth as xs:integer, $depth as xs:integer) as element() { |
---|
46 | let $collection := collection($cmd-model:cmdiMirrorPath), |
---|
47 | $path-nodes := |
---|
48 | if ($collections[1] eq $cmd-model:collectionRoot) then |
---|
49 | util:eval(fn:concat("$collection//", $path)) |
---|
50 | else |
---|
51 | for $coll in $collections return util:eval(fn:concat("$collection//ft:query(descendant::IsPartOf, <query><term>", xdb:decode($coll) ,"</term></query>)//", $path, "/ancestor::CMD")) |
---|
52 | |
---|
53 | let $path-count := count($path-nodes) |
---|
54 | |
---|
55 | let $subs := distinct-values($path-nodes/child::element()/name()) |
---|
56 | let $text-nodes := $path-nodes//text() |
---|
57 | let $text-count := count($text-nodes) |
---|
58 | let $text-count-distinct := count(distinct-values($text-nodes)) |
---|
59 | return |
---|
60 | <Term path="{fn:concat("//", $path)}" name="{text:groups($path, "/([^/]+)$")[last()]}" count="{$path-count}" count_text="{$text-count}" count_distinct_text="{$text-count-distinct}">{ |
---|
61 | if ($depth > 0) then |
---|
62 | (for $elname in $subs[. != ''] |
---|
63 | return |
---|
64 | cmd-model:elem-r($collection, concat($path, '/', $elname), $max-depth, $depth - 1), |
---|
65 | if ($max-depth eq 1) then cmd-model:values($path-nodes) else ()) |
---|
66 | else 'maxdepth' |
---|
67 | }</Term> |
---|
68 | }; |
---|
69 | |
---|
70 | declare function cmd-model:values($nodes as node()*) as node()* { |
---|
71 | let $keys := distinct-values($nodes/text()) |
---|
72 | let $values := for $key at $pos in $keys |
---|
73 | let $kcount := count($nodes[. eq $key]) |
---|
74 | order by lower-case($key) ascending |
---|
75 | return <v key="{$key}" cnt="{$kcount}" /> |
---|
76 | return |
---|
77 | if ($cmd-model:valuesLimit eq 0) then $values |
---|
78 | else |
---|
79 | subsequence($values, 1, $cmd-model:valuesLimit) |
---|
80 | }; |
---|
81 | |
---|
82 | declare function cmd-model:paths($n) { |
---|
83 | for $el in $n |
---|
84 | return <Term name="{$el/name()}"> { |
---|
85 | for $anc in $el/parent::element() |
---|
86 | return util:node-xpath($anc) |
---|
87 | }</Term> |
---|
88 | }; |
---|
89 | |
---|
90 | (: |
---|
91 | |
---|
92 | :) |
---|
93 | declare function cmd-model:recurse-collections-model($collection as xs:string, $type-name as xs:string, $depth as xs:integer, $name as xs:string) as item()* { |
---|
94 | let $children := xdb:get-child-collections($collection) |
---|
95 | return |
---|
96 | if (fn:exists($children)) then |
---|
97 | let $child-results := |
---|
98 | for $child in $children |
---|
99 | return |
---|
100 | cmd-model:recurse-collections-model(fn:concat($collection, '/', xs:string($child)), $type-name, $depth, $name), |
---|
101 | $current := cmd-model:create-doc($collection, $type-name, $depth, $name) |
---|
102 | return ($current, $child-results) |
---|
103 | else |
---|
104 | cmd-model:create-doc($collection, $type-name, $depth, $name) |
---|
105 | }; |
---|
106 | |
---|
107 | (: |
---|
108 | Recurse for collections |
---|
109 | :) |
---|
110 | declare function cmd-model:recurse-collections($collection as node()+, $name as xs:string, $handle as xs:string, $proxy-id as xs:string, $depth as xs:integer) as item()* { |
---|
111 | let $children := if ($depth eq 0) then () else $collection//ResourceProxy[ResourceType = "Metadata"] |
---|
112 | return |
---|
113 | if (fn:exists($children)) then |
---|
114 | let $child-results := |
---|
115 | for $child in $children |
---|
116 | let $child-doc := if (empty($child/unresolvable-uri)) then |
---|
117 | cmd-model:get-resource-by-handle-or-collection-path(util:collection-name($child/root()), $child/ResourceRef) else (), |
---|
118 | $child-name := if (empty($child-doc)) then concat(util:collection-name($child/root()), ":", $child/ResourceRef) else cmd-model:get-md-collection-name($child-doc) |
---|
119 | return |
---|
120 | if (empty($child-doc)) then () |
---|
121 | else |
---|
122 | cmd-model:recurse-collections($child-doc, $child-name, $child-doc//Header/MdSelfLink, $child/@id, $depth - 1) |
---|
123 | return |
---|
124 | <c n="{$name}" handle="{$handle}" proxy-id="{$proxy-id}" cnt="-1" sub-colls="{count($child-results)}" >{$child-results}</c> |
---|
125 | else |
---|
126 | <c n="{$name}" handle="{$handle}" proxy-id="{$proxy-id}" cnt="{if ($handle eq "") then "-1" else cmd-model:get-resource-count($handle)}"></c> |
---|
127 | |
---|
128 | }; |
---|
129 | |
---|
130 | (: |
---|
131 | :) |
---|
132 | declare function cmd-model:get-resource-count($handle as xs:string) as xs:string { |
---|
133 | xs:string(count(collection($cmd-model:cmdiMirrorPath)//IsPartOf[. eq $handle]/ancestor::CMD[descendant::ResourceType[. = "Resource"]])) |
---|
134 | }; |
---|
135 | |
---|
136 | (: |
---|
137 | :) |
---|
138 | declare function cmd-model:get-md-collection-name($collection-doc as node()) as xs:string { |
---|
139 | ($collection-doc//Corpus/Name, $collection-doc//Session/Name, $collection-doc//Collection/GeneralInfo/Name, $collection-doc//Collection/GeneralInfo/Title, "UNKNOWN")[1] |
---|
140 | }; |
---|
141 | |
---|
142 | (: |
---|
143 | |
---|
144 | :) |
---|
145 | declare function cmd-model:create-doc($collections as xs:string+, $type-name as xs:string, $depth as xs:integer, $name as xs:string) as xs:string* { |
---|
146 | (: if newer data available :) |
---|
147 | cmd-model:store-result($collections, cmd-model:elem($collections, $type-name, $depth), $name, $depth) |
---|
148 | (:else () :) |
---|
149 | }; |
---|
150 | |
---|
151 | (: |
---|
152 | |
---|
153 | :) |
---|
154 | declare function cmd-model:get-result-doc($collections as xs:string+, $type-name as xs:string, $depth as xs:integer) as item()* { |
---|
155 | let $name := cmd-model:make-compound-doc-name($collections, "values", xs:string($depth)), |
---|
156 | $dummy := if (cmd-model:is-result-available($cmd-model:commonFreqsPath, $name)) then |
---|
157 | () |
---|
158 | else |
---|
159 | cmd-model:create-doc($collections, $type-name, $depth, $name) |
---|
160 | return |
---|
161 | cmd-model:get-doc($cmd-model:commonFreqsPath, $name) |
---|
162 | }; |
---|
163 | |
---|
164 | (: |
---|
165 | Generic get-doc(collection, docname) |
---|
166 | :) |
---|
167 | declare function cmd-model:get-doc($collection as xs:string, $doc-name as xs:string) as item()* { |
---|
168 | fn:doc(fn:concat($collection, "/", $doc-name)) |
---|
169 | }; |
---|
170 | |
---|
171 | |
---|
172 | (: |
---|
173 | Function for telling wether the result is already available or not. |
---|
174 | :) |
---|
175 | declare function cmd-model:is-result-available($collection as xs:string, $result-ref as xs:string) as xs:boolean { |
---|
176 | fn:doc-available(fn:concat($collection, "/", $result-ref)) |
---|
177 | }; |
---|
178 | |
---|
179 | (: |
---|
180 | Function for telling wether the document is available or not. |
---|
181 | :) |
---|
182 | declare function cmd-model:is-doc-available($collection as xs:string, $doc-name as xs:string) as xs:boolean { |
---|
183 | fn:doc-available(fn:concat($collection, "/", $doc-name)) |
---|
184 | }; |
---|
185 | |
---|
186 | (: |
---|
187 | Store the calculated frequencies for reuse. |
---|
188 | If more than one collection is given the result is stored in the common |
---|
189 | collection for reuse. |
---|
190 | :) |
---|
191 | declare function cmd-model:store-result($coll-names as xs:string+, $entries as element()*, $type-name as xs:string, $depth as xs:integer) as xs:string { |
---|
192 | let $clarin-writer := fn:doc("/db/clarin/writer.xml"), |
---|
193 | $dummy := xdb:login($cmd-model:cmdiDatabaseURI, $clarin-writer//write-user/text(), $clarin-writer//write-user-cred/text()) |
---|
194 | return |
---|
195 | xdb:store($cmd-model:commonFreqsPath, $type-name, cmd-model:make-doc-element-of-type($type-name, $coll-names, $entries, xs:string($depth))) |
---|
196 | }; |
---|
197 | |
---|
198 | (: |
---|
199 | Store the collection listing for given collection. |
---|
200 | :) |
---|
201 | declare function cmd-model:store-collection-data($data as node(), $collection-path as xs:string, $doc-name as xs:string) as xs:string? { |
---|
202 | let $clarin-writer := fn:doc("/db/clarin/writer.xml"), |
---|
203 | $dummy := xdb:login($cmd-model:cmdiDatabaseURI, $clarin-writer//write-user/text(), $clarin-writer//write-user-cred/text()), |
---|
204 | $store := (: util:catch("org.exist.xquery.XPathException", :) xdb:store($collection-path, $doc-name, $data),(: , ()) :) |
---|
205 | $stored-doc := doc(concat($collection-path, "/", $doc-name)), |
---|
206 | $coll-count := update value $stored-doc/Collections/@count with sum($stored-doc//c[not(c)]/@cnt) |
---|
207 | return |
---|
208 | for $i in $stored-doc//c[c][@cnt eq "-1"] |
---|
209 | return update value $i/@cnt with sum($i//c[not(c)]/@cnt) |
---|
210 | |
---|
211 | }; |
---|
212 | |
---|
213 | (: |
---|
214 | Create document name for type () with or without collection path. |
---|
215 | :) |
---|
216 | declare function cmd-model:make-doc-name($coll-name as xs:string?, $type-name as xs:string, $depth as xs:string, $incl-path as xs:boolean) as xs:string { |
---|
217 | let $doc-name := fn:concat($type-name, $depth, $cmd-model:xmlExt) |
---|
218 | return |
---|
219 | if ($incl-path) then |
---|
220 | fn:concat($coll-name, "/", $doc-name) |
---|
221 | else |
---|
222 | $doc-name |
---|
223 | }; |
---|
224 | |
---|
225 | (: |
---|
226 | Create document name with md5-hash for selected collections (or types) for reuse. |
---|
227 | :) |
---|
228 | declare function cmd-model:make-compound-doc-name($coll-names as xs:string+, $type-name as xs:string, $depth as xs:string) as xs:string { |
---|
229 | let $name-prefix := fn:concat($type-name, $depth), |
---|
230 | $sorted-names := for $coll in $coll-names order by $coll ascending return $coll |
---|
231 | return |
---|
232 | fn:concat($name-prefix, "-", util:hash(string-join($sorted-names, ""), "MD5"), $cmd-model:xmlExt) |
---|
233 | }; |
---|
234 | |
---|
235 | (: |
---|
236 | Skapa ett element av angiven typ. |
---|
237 | :) |
---|
238 | declare function cmd-model:make-element-of-type($type-name as xs:string, $count as xs:string, $text-count as xs:string, $text-types-count as xs:string, $value as xs:string) as element() { |
---|
239 | element {$type-name} { |
---|
240 | |
---|
241 | attribute count {$freq}, |
---|
242 | attribute text-count {$rank}, |
---|
243 | attribute text-types-count {$text-types}, |
---|
244 | text {$value} |
---|
245 | } |
---|
246 | }; |
---|
247 | |
---|
248 | (: |
---|
249 | Skapa ett dokumentelement av angiven typ. |
---|
250 | :) |
---|
251 | declare function cmd-model:make-doc-element-of-type($type-name as xs:string, $coll-names as xs:string*, $entries as element()*, $depth as xs:string) as element() { |
---|
252 | let $depth-value := attribute depth {$depth}, |
---|
253 | $coll-names-value := if (fn:empty($coll-names)) then () else attribute colls {fn:string-join($coll-names, ",")} |
---|
254 | return |
---|
255 | element {cmd-model:get-doc-type-element-name($type-name)} { |
---|
256 | $depth-value, |
---|
257 | $coll-names-value, |
---|
258 | attribute created {fn:current-dateTime()}, |
---|
259 | $entries |
---|
260 | } |
---|
261 | }; |
---|
262 | |
---|
263 | (: |
---|
264 | Skapa elementnamn för dokumentet av typ. |
---|
265 | :) |
---|
266 | declare function cmd-model:get-doc-type-element-name($type-name as xs:string) as xs:string { |
---|
267 | $cmd-model:docTypeTerms |
---|
268 | }; |
---|
269 | |
---|
270 | |
---|
271 | (: |
---|
272 | Get the resource by handle or by path. |
---|
273 | :) |
---|
274 | declare function cmd-model:get-resource-by-handle-or-collection-path($id as xs:string, $doc-name as xs:string?) as node()* { |
---|
275 | if (starts-with($id, "test-") or starts-with($id, "clarin-") or starts-with($doc-name, "clarin-")) then |
---|
276 | collection($cmd-model:cmdiMirrorPath)//MdSelfLink[. = xdb:decode(($doc-name, $id)[1])]/ancestor::CMD |
---|
277 | (: let $key := xdb:decode(($doc-name, $id)[1]) |
---|
278 | return ft:query(//MdSelfLink, <term>{$key}</term>)[matches(., concat("^", $key, "$"))]/ancestor::CMD :) |
---|
279 | else if (empty($doc-name) or $id=$cmd-model:collectionRoot ) then |
---|
280 | collection($cmd-model:cmdiMirrorPath)//IsPartOf[. = $cmd-model:collectionRoot]/ancestor::CMD |
---|
281 | else |
---|
282 | doc(concat($id, "/", $doc-name)) |
---|
283 | }; |
---|
284 | |
---|
285 | (: |
---|
286 | Seraliseringsformat. |
---|
287 | :) |
---|
288 | declare function cmd-model:serialise-as($item as node()?, $format as xs:string) as item()? { |
---|
289 | if ($format eq $cmd-model:responseFormatJSon) then |
---|
290 | let $option := util:declare-option("exist:serialize", "method=text media-type=application/json") |
---|
291 | return |
---|
292 | (: json:xml-to-json($item) :) $item |
---|
293 | else (: $cmd-model:responseFormatXml, $cmd-model:responseFormatText:) |
---|
294 | $item |
---|
295 | }; |
---|
296 | |
---|
297 | |
---|
298 | (:~ |
---|
299 | API function queryModel. |
---|
300 | :) |
---|
301 | declare function cmd-model:query-model($cmd-index-path as xs:string, $collection as xs:string+, $format as xs:string, $max-depth as xs:integer) as item()? { |
---|
302 | cmd-model:serialise-as(cmd-model:get-result-doc($collection, $cmd-index-path, $max-depth), $format) |
---|
303 | }; |
---|
304 | |
---|
305 | (:~ |
---|
306 | API function getCollections. |
---|
307 | :) |
---|
308 | declare function cmd-model:get-collections($collections as xs:string+, $format as xs:string, $max-depth as xs:integer) as item() { |
---|
309 | let $names := ($cmd-model:commonFreqsPath, cmd-model:make-compound-doc-name($collections, "collection", xs:string($max-depth))), |
---|
310 | $dummy := |
---|
311 | if (cmd-model:is-doc-available($names[1], $names[2])) then |
---|
312 | () |
---|
313 | else |
---|
314 | let $children := |
---|
315 | for $collection-item in $collections |
---|
316 | return |
---|
317 | for $collection-doc in cmd-model:get-resource-by-handle-or-collection-path($collection-item, ()) return |
---|
318 | cmd-model:recurse-collections($collection-doc, cmd-model:get-md-collection-name($collection-doc), $collection-doc//MdSelfLink, "", $max-depth) |
---|
319 | return |
---|
320 | cmd-model:store-collection-data(<Collections count="-1" sub-colls="{count($children)}" root="{$collections}">{$children}</Collections>, $names[1], $names[2]) |
---|
321 | return |
---|
322 | cmd-model:serialise-as(cmd-model:get-doc($names[1], $names[2]), $format) |
---|
323 | }; |
---|
324 | |
---|
325 | (:~ |
---|
326 | API function searchRetrieve. |
---|
327 | :) |
---|
328 | declare function cmd-model:search-retrieve($cql-query as xs:string, $collections as xs:string+, $format as xs:string, $start-item as xs:integer, $end-item as xs:integer) as item()* { |
---|
329 | let $collection := collection($cmd-model:cmdiMirrorPath), |
---|
330 | $results := |
---|
331 | if ($collections[1] eq $cmd-model:collectionRoot) then |
---|
332 | util:eval(fn:concat("$collection", xdb:decode($cql-query), "/ancestor::CMD")) |
---|
333 | else |
---|
334 | for $coll in $collections return util:eval(fn:concat("$collection", xdb:decode($cql-query), "/ancestor::CMD[descendant::IsPartOf = '", xdb:decode($coll) ,"']")) |
---|
335 | |
---|
336 | let $result-count := fn:count($results), |
---|
337 | $result-seq := fn:subsequence($results, $start-item, $end-item), |
---|
338 | $seq-count := fn:count($result-seq), |
---|
339 | $result-fragment := |
---|
340 | <searchRetrieveResponse> |
---|
341 | <numberOfRecords>{$result-count}</numberOfRecords> |
---|
342 | <echoedSearchRetrieveRequest>{$cql-query, $collections, $start-item, $end-item}</echoedSearchRetrieveRequest> |
---|
343 | <diagnostics>{$seq-count}</diagnostics> |
---|
344 | <records> |
---|
345 | {$result-seq} |
---|
346 | </records> |
---|
347 | </searchRetrieveResponse> |
---|
348 | |
---|
349 | return |
---|
350 | cmd-model:serialise-as($result-fragment, $format) |
---|
351 | |
---|
352 | }; |
---|
353 | (: |
---|
354 | {cmdComponent} //{cmdComponent} Actor //Actor |
---|
355 | {cmdPath}. //{cmdPath}/{cmdComponent} Actor.Contact.Phone //Actor/Contact/Phone |
---|
356 | {cmdIndex} {rel} {term} //{cmdIndex}[\. {rel} '{term}'] Actors.Actor.Sex=f //Actors/Actor/Sex[.='f'] |
---|
357 | {cmdIndex} any {term} //{cmdIndex}[contains(. '{term}')] Organisation.Name any University //Organisation/Name[contains(.,'University')] |
---|
358 | and, or, and not ?! Organisation.Name any University and Actor.gender=m ?! |
---|
359 | |
---|
360 | //MDGroup[Actors/Actor/Role[.='sponsor'] and Actors/Actor/Name[contains(.,'a')]] |
---|
361 | //Title[starts-with(.,'a')] |
---|
362 | //Title[starts-with(.,'A')] |
---|
363 | //Title[contains(.,'analysis')] |
---|
364 | http://demo.spraakdata.gu.se/clarin/cmd/model/stats?operation=searchRetrieve&query=//Title[contains(.,'analysis')]&collection= |
---|
365 | |
---|
366 | <record> |
---|
367 | <recordSchema>info:srw/schema/1/dc-v1.1</recordSchema> |
---|
368 | <recordPacking>xml</recordPacking> |
---|
369 | <recordData> |
---|
370 | <srw_dc:dc xmlns:srw_dc="info:srw/schema/1/dc-v1.1"> |
---|
371 | <dc:title>This is a Sample Record</dc:title> |
---|
372 | </srw_dc:dc> |
---|
373 | </recordData> |
---|
374 | <recordPosition>1</recordPosition> |
---|
375 | <extraRecordData> |
---|
376 | <rel:score xmlns:rel="info:srw/extensions/2/rel-1.0"> |
---|
377 | 0.965 |
---|
378 | </rel:rank> |
---|
379 | </extraRecordData> |
---|
380 | </record> |
---|
381 | |
---|
382 | <searchRetrieveResponse> |
---|
383 | <numberOfRecords>integer</numberOfRecords> |
---|
384 | <echoedSearchRetrieveRequest>query itself (together with the context-collection) </echoedSearchRetrieveRequest> |
---|
385 | <diagnostics>if necessary</diagnostics> |
---|
386 | <records> |
---|
387 | .... |
---|
388 | </records> |
---|
389 | </searchRetrieveResponse> |
---|
390 | |
---|
391 | :) |
---|