source: MDRepository/trunk/xquery/cmd-model.xqm @ 726

Last change on this file since 726 was 726, checked in by vronk, 14 years ago

fixing bug in search-retrieve
whole cmdi-mirror injected in the response header with the var $collection (instead of $collections)

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 15.7 KB
Line 
1module namespace cmd-model = "http://spraakbanken.gu.se/clarin/xquery/model";
2
3(:
4 $Id: cmd-model.xqm 726 2010-09-28 11:31:03Z vronk $
5:)
6
7import module namespace xdb="http://exist-db.org/xquery/xmldb";
8import module namespace util="http://exist-db.org/xquery/util";
9
10declare variable $cmd-model:cmdiDatabaseURI as xs:string := "xmldb:exist:///db";
11
12declare variable $cmd-model:commonFreqsPath as xs:string := "/db/common/clarin/freqs";
13declare variable $cmd-model:cmdiMirrorPath as xs:string := "/db/cmdi-mirror";
14
15declare variable $cmd-model:getCollections as xs:string := "getCollections";
16declare variable $cmd-model:queryModel as xs:string := "queryModel";
17declare variable $cmd-model:searchRetrieve as xs:string := "searchRetrieve";
18
19declare variable $cmd-model:typeActorPath as xs:string := "MDGroup/Actors/Actor";
20declare variable $cmd-model:typeActorPath0 as xs:string := "Actor";
21declare variable $cmd-model:typeActorRolePath as xs:string := "MDGroup/Actors/Actor/Role";
22
23declare variable $cmd-model:docTypeTerms as xs:string := "Terms";
24declare variable $cmd-model:docTypeSuffix as xs:string := "Values";
25
26declare variable $cmd-model:responseFormatXml as xs:string := "xml";
27declare variable $cmd-model:responseFormatJSon as xs:string := "json";
28declare variable $cmd-model:responseFormatText as xs:string := "text";
29
30declare variable $cmd-model:collectionDocName as xs:string := "collection.xml";
31
32declare variable $cmd-model:collectionRoot as xs:string := "root";
33
34declare variable $cmd-model:xmlExt as xs:string := ".xml";
35
36declare variable $cmd-model:valuesLimit as xs:integer := 100;
37
38
39
40declare function cmd-model:elem($collections as xs:string+, $path as xs:string, $depth as xs:integer) as element() {
41(: fixme! - handle multiple collections :)
42cmd-model:elem-r($collections, $path, $depth, $depth)
43};
44
45declare function cmd-model:elem-r($collections as xs:string+, $path as xs:string, $max-depth as xs:integer, $depth as xs:integer) as element() {
46  let $collection := collection($cmd-model:cmdiMirrorPath),
47    $path-nodes :=
48    if ($collections[1] eq $cmd-model:collectionRoot) then
49      util:eval(fn:concat("$collection//", $path))
50    else
51      for $coll in $collections return util:eval(fn:concat("$collection//ft:query(descendant::IsPartOf, <query><term>", xdb:decode($coll) ,"</term></query>)//", $path, "/ancestor::CMD"))
52
53  let $path-count := count($path-nodes)
54   
55  let $subs := distinct-values($path-nodes/child::element()/name())
56  let $text-nodes := $path-nodes//text()
57  let $text-count := count($text-nodes)
58  let $text-count-distinct := count(distinct-values($text-nodes))
59  return 
60        <Term path="{fn:concat("//", $path)}" name="{text:groups($path, "/([^/]+)$")[last()]}" count="{$path-count}" count_text="{$text-count}"  count_distinct_text="{$text-count-distinct}">{
61          if ($depth > 0) then
62            (for $elname in $subs[. != '']
63            return
64              cmd-model:elem-r($collection, concat($path, '/', $elname), $max-depth, $depth - 1),
65              if ($max-depth eq 1) then cmd-model:values($path-nodes) else ())
66          else 'maxdepth'
67        }</Term>
68};
69
70declare function cmd-model:values($nodes as node()*) as node()* {
71let $keys := distinct-values($nodes/text())
72let $values := for $key at $pos in $keys
73  let $kcount := count($nodes[. eq $key])
74    order by lower-case($key) ascending
75    return <v key="{$key}" cnt="{$kcount}" />
76return
77  if ($cmd-model:valuesLimit eq 0) then $values
78  else
79  subsequence($values, 1, $cmd-model:valuesLimit)
80};
81
82declare function cmd-model:paths($n) {
83        for $el in $n
84        return <Term name="{$el/name()}"> {
85        for $anc in $el/parent::element()
86        return util:node-xpath($anc)
87        }</Term>
88};
89
90(:
91
92:)
93declare function cmd-model:recurse-collections-model($collection as xs:string, $type-name as xs:string, $depth as xs:integer, $name as xs:string) as item()* {
94    let $children := xdb:get-child-collections($collection)
95    return
96      if (fn:exists($children)) then
97          let $child-results :=
98            for $child in $children
99            return
100              cmd-model:recurse-collections-model(fn:concat($collection, '/', xs:string($child)), $type-name, $depth, $name),
101              $current := cmd-model:create-doc($collection, $type-name, $depth, $name)
102          return ($current, $child-results)
103      else
104        cmd-model:create-doc($collection, $type-name, $depth, $name)
105};
106
107(:
108  Recurse for collections
109:)
110declare function cmd-model:recurse-collections($collection as node()+, $name as xs:string, $handle as xs:string, $proxy-id as xs:string, $depth as xs:integer) as item()* {
111    let $children := if ($depth eq 0) then () else $collection//ResourceProxy[ResourceType = "Metadata"]
112    return
113      if (fn:exists($children)) then
114          let $child-results :=
115            for $child in $children
116              let $child-doc := if (empty($child/unresolvable-uri)) then
117                cmd-model:get-resource-by-handle-or-collection-path(util:collection-name($child/root()), $child/ResourceRef) else (),
118                $child-name := if (empty($child-doc)) then concat(util:collection-name($child/root()), ":", $child/ResourceRef) else cmd-model:get-md-collection-name($child-doc)
119            return
120              if (empty($child-doc)) then ()
121             else
122              cmd-model:recurse-collections($child-doc, $child-name, $child-doc//Header/MdSelfLink, $child/@id, $depth - 1)
123          return
124          <c n="{$name}" handle="{$handle}" proxy-id="{$proxy-id}" cnt="-1" sub-colls="{count($child-results)}" >{$child-results}</c>
125      else
126      <c n="{$name}" handle="{$handle}" proxy-id="{$proxy-id}" cnt="{if ($handle eq "") then "-1" else cmd-model:get-resource-count($handle)}"></c>
127
128};
129
130(:
131:)
132declare function cmd-model:get-resource-count($handle as xs:string) as xs:string {
133xs:string(count(collection($cmd-model:cmdiMirrorPath)//IsPartOf[. eq $handle]/ancestor::CMD[descendant::ResourceType[. = "Resource"]]))
134};
135
136(:
137:)
138declare function cmd-model:get-md-collection-name($collection-doc as node()) as xs:string {
139($collection-doc//Corpus/Name, $collection-doc//Session/Name, $collection-doc//Collection/GeneralInfo/Name, $collection-doc//Collection/GeneralInfo/Title, "UNKNOWN")[1]
140};
141
142(:
143
144:)
145declare function cmd-model:create-doc($collections as xs:string+, $type-name as xs:string, $depth as xs:integer, $name as xs:string) as xs:string* {
146  (: if newer data available :)
147    cmd-model:store-result($collections, cmd-model:elem($collections, $type-name, $depth), $name, $depth)
148  (:else () :)
149};
150
151(:
152
153:)
154declare function cmd-model:get-result-doc($collections as xs:string+, $type-name as xs:string, $depth as xs:integer) as item()* {
155  let $name := cmd-model:make-compound-doc-name($collections, "values", xs:string($depth)),
156    $dummy := if (cmd-model:is-result-available($cmd-model:commonFreqsPath, $name)) then
157    ()
158    else
159      cmd-model:create-doc($collections, $type-name, $depth, $name)
160    return
161      cmd-model:get-doc($cmd-model:commonFreqsPath, $name)
162};
163
164(:
165 Generic get-doc(collection, docname)
166:)
167declare function cmd-model:get-doc($collection as xs:string, $doc-name as xs:string) as item()* {
168      fn:doc(fn:concat($collection, "/", $doc-name))
169};
170
171
172(:
173  Function for telling wether the result is already available or not.
174:)
175declare function cmd-model:is-result-available($collection as xs:string, $result-ref as xs:string) as xs:boolean {
176  fn:doc-available(fn:concat($collection, "/", $result-ref))
177};
178
179(:
180  Function for telling wether the document is available or not.
181:)
182declare function cmd-model:is-doc-available($collection as xs:string, $doc-name as xs:string) as xs:boolean {
183  fn:doc-available(fn:concat($collection, "/", $doc-name))
184};
185
186(:
187  Store the calculated frequencies for reuse.
188  If more than one collection is given the result is stored in the common
189  collection for reuse.
190:)
191declare function cmd-model:store-result($coll-names as xs:string+, $entries as element()*, $type-name as xs:string, $depth as xs:integer) as xs:string {
192  let $clarin-writer := fn:doc("/db/clarin/writer.xml"),
193    $dummy := xdb:login($cmd-model:cmdiDatabaseURI, $clarin-writer//write-user/text(), $clarin-writer//write-user-cred/text())
194    return
195        xdb:store($cmd-model:commonFreqsPath, $type-name, cmd-model:make-doc-element-of-type($type-name, $coll-names, $entries, xs:string($depth)))
196};
197
198(:
199  Store the collection listing for given collection.
200:)
201declare function cmd-model:store-collection-data($data as node(), $collection-path as xs:string, $doc-name as xs:string) as xs:string? {
202  let $clarin-writer := fn:doc("/db/clarin/writer.xml"),
203  $dummy := xdb:login($cmd-model:cmdiDatabaseURI, $clarin-writer//write-user/text(), $clarin-writer//write-user-cred/text()),
204  $store := (: util:catch("org.exist.xquery.XPathException", :) xdb:store($collection-path, $doc-name, $data),(: , ()) :)
205  $stored-doc := doc(concat($collection-path, "/", $doc-name)),
206  $coll-count := update value $stored-doc/Collections/@count with sum($stored-doc//c[not(c)]/@cnt)
207  return 
208  for $i in $stored-doc//c[c][@cnt eq "-1"]
209  return update value $i/@cnt with sum($i//c[not(c)]/@cnt)
210
211};
212
213(:
214  Create document name for type () with or without collection path.
215:)
216declare function cmd-model:make-doc-name($coll-name as xs:string?, $type-name as xs:string, $depth as xs:string, $incl-path as xs:boolean) as xs:string {
217  let $doc-name := fn:concat($type-name, $depth, $cmd-model:xmlExt)
218  return
219    if ($incl-path) then
220      fn:concat($coll-name, "/", $doc-name)
221    else
222      $doc-name
223};
224
225(:
226  Create document name with md5-hash for selected collections (or types) for reuse.
227:)
228declare function cmd-model:make-compound-doc-name($coll-names as xs:string+, $type-name as xs:string, $depth as xs:string) as xs:string {
229  let $name-prefix := fn:concat($type-name, $depth),
230    $sorted-names := for $coll in $coll-names order by $coll ascending return $coll
231    return
232    fn:concat($name-prefix, "-", util:hash(string-join($sorted-names, ""), "MD5"), $cmd-model:xmlExt)
233};
234
235(:
236  Skapa ett element av angiven typ.
237:)
238declare function cmd-model:make-element-of-type($type-name as xs:string, $count as xs:string, $text-count as xs:string, $text-types-count as xs:string, $value as xs:string) as element() {
239  element {$type-name} {
240
241      attribute count {$freq},
242      attribute text-count {$rank},
243      attribute text-types-count {$text-types},
244      text {$value} 
245  }
246};
247
248(:
249  Skapa ett dokumentelement av angiven typ.
250:)
251declare function cmd-model:make-doc-element-of-type($type-name as xs:string, $coll-names as xs:string*, $entries as element()*, $depth as xs:string) as element() {
252      let $depth-value := attribute depth {$depth},
253      $coll-names-value := if (fn:empty($coll-names)) then () else attribute colls {fn:string-join($coll-names, ",")}
254      return
255        element {cmd-model:get-doc-type-element-name($type-name)} {
256          $depth-value,
257          $coll-names-value,
258          attribute created {fn:current-dateTime()},
259          $entries
260        }
261};
262
263(:
264  Skapa elementnamn för dokumentet av typ.
265:)
266declare function cmd-model:get-doc-type-element-name($type-name as xs:string) as xs:string {
267  $cmd-model:docTypeTerms
268};
269
270
271(:
272  Get the resource by handle or by path.
273:)
274declare function cmd-model:get-resource-by-handle-or-collection-path($id as xs:string, $doc-name as xs:string?) as node()* {
275  if (starts-with($id, "test-") or starts-with($id, "clarin-") or starts-with($doc-name, "clarin-")) then
276    collection($cmd-model:cmdiMirrorPath)//MdSelfLink[. = xdb:decode(($doc-name, $id)[1])]/ancestor::CMD
277    (: let $key := xdb:decode(($doc-name, $id)[1])
278    return ft:query(//MdSelfLink, <term>{$key}</term>)[matches(., concat("^", $key, "$"))]/ancestor::CMD :)
279  else if (empty($doc-name) or $id=$cmd-model:collectionRoot ) then
280    collection($cmd-model:cmdiMirrorPath)//IsPartOf[. = $cmd-model:collectionRoot]/ancestor::CMD
281  else
282    doc(concat($id, "/", $doc-name))
283};
284
285(:
286  Seraliseringsformat.
287:)
288declare function cmd-model:serialise-as($item as node()?, $format as xs:string) as item()? {
289      if ($format eq $cmd-model:responseFormatJSon) then
290        let $option := util:declare-option("exist:serialize", "method=text media-type=application/json")
291          return
292           (: json:xml-to-json($item) :) $item
293      else (: $cmd-model:responseFormatXml, $cmd-model:responseFormatText:)
294        $item
295};
296
297
298(:~
299  API function queryModel.
300:)
301declare function cmd-model:query-model($cmd-index-path as xs:string, $collection as xs:string+, $format as xs:string, $max-depth as xs:integer) as item()? {
302        cmd-model:serialise-as(cmd-model:get-result-doc($collection, $cmd-index-path, $max-depth), $format)
303};
304
305(:~
306  API function getCollections.
307:)
308declare function cmd-model:get-collections($collections as xs:string+, $format as xs:string, $max-depth as xs:integer) as item() {
309  let $names := ($cmd-model:commonFreqsPath, cmd-model:make-compound-doc-name($collections, "collection", xs:string($max-depth))),
310 $dummy := 
311      if (cmd-model:is-doc-available($names[1], $names[2])) then
312        ()
313      else
314        let $children := 
315        for $collection-item in $collections
316        return
317        for $collection-doc in cmd-model:get-resource-by-handle-or-collection-path($collection-item, ()) return
318          cmd-model:recurse-collections($collection-doc, cmd-model:get-md-collection-name($collection-doc), $collection-doc//MdSelfLink, "", $max-depth)
319          return
320            cmd-model:store-collection-data(<Collections count="-1" sub-colls="{count($children)}" root="{$collections}">{$children}</Collections>, $names[1], $names[2])
321return 
322  cmd-model:serialise-as(cmd-model:get-doc($names[1], $names[2]), $format)
323};
324
325(:~
326  API function searchRetrieve.
327:)
328declare function cmd-model:search-retrieve($cql-query as xs:string, $collections as xs:string+, $format as xs:string, $start-item as xs:integer, $end-item as xs:integer) as item()* {
329  let $collection := collection($cmd-model:cmdiMirrorPath),
330    $results :=
331    if ($collections[1] eq $cmd-model:collectionRoot) then
332      util:eval(fn:concat("$collection", xdb:decode($cql-query), "/ancestor::CMD"))
333    else
334      for $coll in $collections return util:eval(fn:concat("$collection", xdb:decode($cql-query), "/ancestor::CMD[descendant::IsPartOf = '", xdb:decode($coll) ,"']"))
335
336    let $result-count := fn:count($results),
337    $result-seq := fn:subsequence($results, $start-item, $end-item),
338    $seq-count := fn:count($result-seq),
339    $result-fragment :=
340    <searchRetrieveResponse>
341      <numberOfRecords>{$result-count}</numberOfRecords>
342      <echoedSearchRetrieveRequest>{$cql-query, $collections, $start-item, $end-item}</echoedSearchRetrieveRequest>
343      <diagnostics>{$seq-count}</diagnostics>
344      <records>
345        {$result-seq}
346      </records>
347    </searchRetrieveResponse>
348
349    return
350        cmd-model:serialise-as($result-fragment, $format)
351
352};
353(:
354{cmdComponent}   //{cmdComponent}        Actor   //Actor
355{cmdPath}.      //{cmdPath}/{cmdComponent}      Actor.Contact.Phone     //Actor/Contact/Phone
356{cmdIndex} {rel} {term}         //{cmdIndex}[\. {rel} '{term}']         Actors.Actor.Sex=f      //Actors/Actor/Sex[.='f']
357{cmdIndex} any {term}   //{cmdIndex}[contains(. '{term}')]      Organisation.Name any University        //Organisation/Name[contains(.,'University')]
358and, or, and not        ?!      Organisation.Name any University and Actor.gender=m     ?!
359
360//MDGroup[Actors/Actor/Role[.='sponsor'] and Actors/Actor/Name[contains(.,'a')]]
361//Title[starts-with(.,'a')]
362//Title[starts-with(.,'A')]
363//Title[contains(.,'analysis')]
364http://demo.spraakdata.gu.se/clarin/cmd/model/stats?operation=searchRetrieve&query=//Title[contains(.,'analysis')]&collection=
365
366<record>
367  <recordSchema>info:srw/schema/1/dc-v1.1</recordSchema>
368  <recordPacking>xml</recordPacking>
369  <recordData>
370    <srw_dc:dc xmlns:srw_dc="info:srw/schema/1/dc-v1.1">
371     <dc:title>This is a Sample Record</dc:title>
372    </srw_dc:dc>
373  </recordData>
374  <recordPosition>1</recordPosition>
375  <extraRecordData>
376    <rel:score xmlns:rel="info:srw/extensions/2/rel-1.0">
377      0.965
378    </rel:rank>
379   </extraRecordData>
380</record>
381
382<searchRetrieveResponse>
383        <numberOfRecords>integer</numberOfRecords>
384        <echoedSearchRetrieveRequest>query itself (together with the context-collection) </echoedSearchRetrieveRequest>
385        <diagnostics>if necessary</diagnostics>
386        <records>
387                ....
388        </records>
389</searchRetrieveResponse>
390
391:)
Note: See TracBrowser for help on using the repository browser.