source: MDRepository/trunk/xquery/cmd-model.xqm @ 657

Last change on this file since 657 was 657, checked in by ljo, 14 years ago

cmd-model.xqm - added count of sub collections. All calculated collection docs go into the same collection.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 13.8 KB
Line 
1module namespace cmd-model = "http://spraakbanken.gu.se/clarin/xquery/model";
2
3(:
4 $Id: cmd-model.xqm 657 2010-08-26 13:39:21Z ljo $
5:)
6
7import module namespace xdb="http://exist-db.org/xquery/xmldb";
8import module namespace util="http://exist-db.org/xquery/util";
9
10declare variable $cmd-model:cmdiDatabaseURI as xs:string := "xmldb:exist:///db";
11
12declare variable $cmd-model:commonFreqsPath as xs:string := "/db/common/clarin/freqs";
13declare variable $cmd-model:cmdiMirrorPath as xs:string := "/db/cmdi-mirror";
14
15declare variable $cmd-model:getCollections as xs:string := "getCollections";
16declare variable $cmd-model:queryModel as xs:string := "queryModel";
17declare variable $cmd-model:searchRetrieve as xs:string := "searchRetrieve";
18
19declare variable $cmd-model:typeActorPath as xs:string := "MDGroup/Actors/Actor";
20declare variable $cmd-model:typeActorPath0 as xs:string := "Actor";
21declare variable $cmd-model:typeActorRolePath as xs:string := "MDGroup/Actors/Actor/Role";
22
23declare variable $cmd-model:docTypeTerms as xs:string := "Terms";
24declare variable $cmd-model:docTypeSuffix as xs:string := "Values";
25
26declare variable $cmd-model:responseFormatXml as xs:string := "xml";
27declare variable $cmd-model:responseFormatJSon as xs:string := "json";
28declare variable $cmd-model:responseFormatText as xs:string := "text";
29
30declare variable $cmd-model:collectionDocName as xs:string := "collection.xml";
31
32declare variable $cmd-model:xmlExt as xs:string := ".xml";
33
34declare function cmd-model:elem($collection as xs:string, $path as xs:string, $depth as xs:integer) as element() {
35  let $path-nodes := util:eval(fn:concat("collection('", $collection, "')//", $path))
36  let $path-count := count($path-nodes)
37   
38  let $subs := distinct-values($path-nodes/child::element()/name())
39  let $text-nodes := $path-nodes//text()
40  let $text-count := count($text-nodes)
41  let $text-count-distinct := count(distinct-values($text-nodes))
42  return 
43        <Term path="{fn:concat("//", $path)}" name="{text:groups($path, "/([^/]+)$")[last()]}" count="{$path-count}" count_text="{$text-count}"  count_distinct_text="{$text-count-distinct}">{
44          if ($depth > 0) then
45            for $elname in $subs[. != '']
46            return
47              cmd-model:elem($collection, concat($path, '/', $elname), $depth - 1)
48          else 'maxdepth'
49        }</Term>
50};
51
52declare function cmd-model:paths($n) {
53        for $el in $n
54        return <Term name="{$el/name()}"> {
55        for $anc in $el/parent::element()
56        return util:node-xpath($anc)
57        }</Term>
58};
59
60(:
61
62:)
63declare function cmd-model:recurse-collections-model($collection as xs:string, $type-name as xs:string, $depth as xs:integer) as item()* {
64    let $children := xdb:get-child-collections($collection)
65    return
66      if (fn:exists($children)) then
67          let $child-results :=
68            for $child in $children
69            return
70              cmd-model:recurse-collections-model(fn:concat($collection, '/', xs:string($child)), $type-name, $depth),
71              $current := cmd-model:create-doc($collection, $type-name, $depth)
72          return ($current, $child-results)
73      else
74        cmd-model:create-doc($collection, $type-name, $depth)
75};
76
77(:
78  Recurse for collections
79:)
80declare function cmd-model:recurse-collections($collection as node()+, $parent as xs:string, $handle as xs:string, $proxy-id as xs:string, $depth as xs:integer) as item()* {
81    let $children := if ($depth eq 0) then () else $collection//ResourceProxy[ResourceType = "Metadata"]
82    return
83      if (fn:exists($children)) then
84          let $child-results :=
85            for $child in $children
86              let $child-doc := if (empty($child/unresolvable-uri)) then doc(concat(util:collection-name($child/root()), "/", $child/ResourceRef)) else (),
87                $child-name := ($child-doc//Corpus/Name, $child-doc//Session/Name, "UNKNOWN")[1]
88            return
89              if (empty($child-doc)) then () (:<no-target proxy-id="{$child/@id}">{<proxy>{$child}</proxy>, <in-document>{concat(util:collection-name($child/root()), "/", util:document-name($child/root()))}</in-document>}</no-target> :) 
90             else
91              cmd-model:recurse-collections($child-doc, $child-name, $child-doc//Header/MdSelfLink, $child/@id, $depth - 1)
92          return
93          <c n="{$parent}" handle="{$handle}" proxy-id="{$proxy-id}" cnt="{sum($child-results/*:c/@cnt)}" sub-colls="{count($child-results)}" >{$child-results}</c>
94      else
95      <c n="{$parent}" handle="{$handle}" proxy-id="{$proxy-id}" cnt="{count(collection(concat(util:collection-name($collection/CMD[1]/root()), "/../"))//CMD[not(//ResourceType[. = "Metadata"])])}"></c>
96};
97
98(:
99
100:)
101declare function cmd-model:create-doc($collection as xs:string, $type-name as xs:string, $depth as xs:integer) as xs:string* {
102  (: if newer data available :)
103    cmd-model:store-result($collection, cmd-model:elem($collection, $type-name, $depth), $type-name, $depth)
104  (:else () :)
105};
106
107(:
108
109:)
110declare function cmd-model:get-result-doc($collection as xs:string, $type-name as xs:string, $depth as xs:integer) as item()* {
111  let $name-last := text:groups($type-name, "/(\w+)$")[last()],
112    $new-name := if (fn:empty($name-last)) then $type-name else $name-last,
113    $dummy := if (cmd-model:is-result-available($collection, fn:concat("/", $new-name, xs:string($depth)))) then
114    ()
115    else
116      cmd-model:create-doc($collection, $type-name, $depth)
117    return
118      fn:doc(fn:concat($collection, "/", $new-name, xs:string($depth), $cmd-model:xmlExt))
119};
120
121(:
122 Generic get-doc(collection, docname)
123:)
124declare function cmd-model:get-doc($collection as xs:string, $doc-name as xs:string) as item()* {
125      fn:doc(fn:concat($collection, "/", $doc-name))
126};
127
128
129(:
130  Function for telling wether the result is already available or not.
131:)
132declare function cmd-model:is-result-available($collection as xs:string, $result-ref as xs:string) as xs:boolean {
133  fn:doc-available(fn:concat($collection, $result-ref, $cmd-model:xmlExt))
134};
135
136(:
137  Function for telling wether the document is available or not.
138:)
139declare function cmd-model:is-doc-available($collection as xs:string, $doc-name as xs:string) as xs:boolean {
140  fn:doc-available(fn:concat($collection, "/", $doc-name))
141};
142
143(:
144  Store the calculated frequencies for reuse.
145  If more than one collection is given the result is stored in the common
146  collection for reuse.
147:)
148declare function cmd-model:store-result($coll-names as xs:string+, $entries as element()*, $type-name as xs:string, $depth as xs:integer) as xs:string {
149  let $clarin-writer := fn:doc("/db/clarin/writer.xml"),
150    $dummy := xdb:login($cmd-model:cmdiDatabaseURI, $clarin-writer//write-user/text(), $clarin-writer//write-user-cred/text())
151    return
152   
153      if (fn:exists($coll-names[2])) then
154        (: Det gÀller fler Àn en samling. :)
155        xdb:store($cmd-model:commonFreqsPath, cmd-model:make-compound-doc-name($coll-names, $type-name, $depth), cmd-model:make-doc-element-of-type($type-name, $coll-names, $entries, $depth))
156      else
157        (: Det gÀller endast en samling. :)
158        let $dummy := util:log('debug', fn:concat('Stores ', $type-name, ' in ', $coll-names))
159        return xdb:store($coll-names, cmd-model:make-doc-name($coll-names, $type-name, xs:string($depth), fn:false()), cmd-model:make-doc-element-of-type($type-name, (), $entries, xs:string($depth)))
160};
161
162(:
163  Store the collection listing in give collection.
164:)
165declare function cmd-model:store-collection-data($data as node(), $collection-path as xs:string, $doc-name as xs:string) as xs:string? {
166  let $clarin-writer := fn:doc("/db/clarin/writer.xml"),
167  $dummy := xdb:login($cmd-model:cmdiDatabaseURI, $clarin-writer//write-user/text(), $clarin-writer//write-user-cred/text())
168  return 
169      (: util:catch("org.exist.xquery.XPathException", :) xdb:store($collection-path, $doc-name, $data)(: , ()) :)
170};
171
172(:
173  Create document name for type () with or without collection path.
174:)
175declare function cmd-model:make-doc-name($coll-name as xs:string?, $type-name as xs:string, $depth as xs:string, $incl-path as xs:boolean) as xs:string {
176  let $doc-name := fn:concat($type-name, $depth, $cmd-model:xmlExt)
177  return
178    if ($incl-path) then
179      fn:concat($coll-name, "/", $doc-name)
180    else
181      $doc-name
182};
183
184(:
185  Create document name with md5-hash for selected collections (or types) for reuse.
186:)
187declare function cmd-model:make-compound-doc-name($coll-names as xs:string+, $type-name as xs:string, $depth as xs:string) as xs:string {
188  let $name-prefix := fn:concat($type-name, $depth)
189    return
190    fn:concat($name-prefix, "-", util:hash(string-join($coll-names, ""), "MD5"), $cmd-model:xmlExt)
191};
192
193(:
194  Skapa ett element av angiven typ.
195:)
196declare function cmd-model:make-element-of-type($type-name as xs:string, $count as xs:string, $text-count as xs:string, $text-types-count as xs:string, $value as xs:string) as element() {
197  element {$type-name} {
198
199      attribute count {$freq},
200      attribute text-count {$rank},
201      attribute text-types-count {$text-types},
202      text {$value} 
203  }
204};
205
206(:
207  Skapa ett dokumentelement av angiven typ.
208:)
209declare function cmd-model:make-doc-element-of-type($type-name as xs:string, $coll-names as xs:string*, $entries as element()*, $depth as xs:string) as element() {
210      let $depth-value := attribute depth {$depth},
211      $coll-names-value := if (fn:empty($coll-names)) then () else attribute colls {fn:string-join($coll-names, ",")}
212      return
213        element {cmd-model:get-doc-type-element-name($type-name)} {
214          $depth-value,
215          $coll-names-value,
216          attribute created {fn:current-dateTime()},
217          $entries
218        }
219};
220
221(:
222  Skapa elementnamn för dokumentet av typ.
223:)
224declare function cmd-model:get-doc-type-element-name($type-name as xs:string) as xs:string {
225  $cmd-model:docTypeTerms
226};
227
228
229(:
230  Get the resource by handle or by path.
231:)
232declare function cmd-model:get-resource-by-handle-or-collection-path($id as xs:string) as node()* {
233  (: fixme! - urldecode $id when we decide to pass them encoded from the client.  :)
234  if (starts-with($id, "test-")) then
235    collection($cmd-model:cmdiMirrorPath)//MdSelfLink[. = xdb:decode($id)]/ancestor::CMD
236  else
237    collection($id)//IsPartOf[. = "root"]/ancestor::CMD
238};
239
240(:
241  Seraliseringsformat.
242:)
243declare function cmd-model:serialise-as($item as node()?, $format as xs:string) as item()? {
244      if ($format eq $cmd-model:responseFormatJSon) then
245        let $option := util:declare-option("exist:serialize", "method=text media-type=application/json")
246          return
247           (: json:xml-to-json($item) :) $item
248      else (: $cmd-model:responseFormatXml, $cmd-model:responseFormatText:)
249        $item
250};
251
252
253(:~
254  API function queryModel.
255:)
256declare function cmd-model:query-model($cmd-index-path as xs:string, $collection as xs:string+, $format as xs:string, $max-depth as xs:integer) as item() {
257        cmd-model:serialise-as(cmd-model:get-result-doc($collection, $cmd-index-path, $max-depth), $format)
258};
259
260(:~
261  API function getCollections.
262:)
263declare function cmd-model:get-collections($collections as xs:string+, $format as xs:string, $max-depth as xs:integer) as item() {
264  let $names := ($cmd-model:commonFreqsPath, cmd-model:make-compound-doc-name($collections, "collection", $max-depth)),
265 $dummy := 
266      if (cmd-model:is-doc-available($names[1], $names[2])) then
267        ()
268      else
269        let $children := 
270        for $collection-item in $collections
271        return
272        for $collection-doc in cmd-model:get-resource-by-handle-or-collection-path($collection-item) return
273          cmd-model:recurse-collections($collection-doc, ($collection-doc//Corpus/Name, $collection-doc//Session/Name, "UNKNOWN")[1], $collection-doc//MdSelfLink, "", $max-depth)
274          return
275            cmd-model:store-collection-data(<Collections count="{count($children/@cnt)}" root="{$collections}">{$children}</Collections>, $names[1], $names[2])
276return 
277  cmd-model:serialise-as(cmd-model:get-doc($names[1], $names[2]), $format)
278};
279
280(:~
281  API function searchRetrieve.
282:)
283declare function cmd-model:search-retrieve($cql-query as xs:string, $collection as xs:string+, $format as xs:string, $start-item as xs:integer, $end-item as xs:integer) as item()* {
284  let $results := for $coll in $collection return util:eval(fn:concat("collection('", xdb:decode($coll), "')", xdb:decode($cql-query), "/ancestor::CMD")),
285    $result-count := fn:count($results),
286    $result-seq := fn:subsequence($results, $start-item, $end-item),
287    $seq-count := fn:count($result-seq),
288    $result-fragment :=
289    <searchRetrieveResponse>
290      <numberOfRecords>{$result-count}</numberOfRecords>
291      <echoedSearchRetrieveRequest>{$cql-query, $collection, $start-item, $end-item}</echoedSearchRetrieveRequest>
292      <diagnostics>{$seq-count}</diagnostics>
293      <records>
294        {$result-seq}
295      </records>
296    </searchRetrieveResponse>
297
298    return
299        cmd-model:serialise-as($result-fragment, $format)
300
301};
302(:
303{cmdComponent}   //{cmdComponent}        Actor   //Actor
304{cmdPath}.      //{cmdPath}/{cmdComponent}      Actor.Contact.Phone     //Actor/Contact/Phone
305{cmdIndex} {rel} {term}         //{cmdIndex}[\. {rel} '{term}']         Actors.Actor.Sex=f      //Actors/Actor/Sex[.='f']
306{cmdIndex} any {term}   //{cmdIndex}[contains(. '{term}')]      Organisation.Name any University        //Organisation/Name[contains(.,'University')]
307and, or, and not        ?!      Organisation.Name any University and Actor.gender=m     ?!
308
309//MDGroup[Actors/Actor/Role[.='sponsor'] and Actors/Actor/Name[contains(.,'a')]]
310//Title[starts-with(.,'a')]
311//Title[starts-with(.,'A')]
312//Title[contains(.,'analysis')]
313http://demo.spraakdata.gu.se/clarin/cmd/model/stats?operation=searchRetrieve&query=//Title[contains(.,'analysis')]&collection=
314
315<record>
316  <recordSchema>info:srw/schema/1/dc-v1.1</recordSchema>
317  <recordPacking>xml</recordPacking>
318  <recordData>
319    <srw_dc:dc xmlns:srw_dc="info:srw/schema/1/dc-v1.1">
320     <dc:title>This is a Sample Record</dc:title>
321    </srw_dc:dc>
322  </recordData>
323  <recordPosition>1</recordPosition>
324  <extraRecordData>
325    <rel:score xmlns:rel="info:srw/extensions/2/rel-1.0">
326      0.965
327    </rel:rank>
328   </extraRecordData>
329</record>
330
331<searchRetrieveResponse>
332        <numberOfRecords>integer</numberOfRecords>
333        <echoedSearchRetrieveRequest>query itself (together with the context-collection) </echoedSearchRetrieveRequest>
334        <diagnostics>if necessary</diagnostics>
335        <records>
336                ....
337        </records>
338</searchRetrieveResponse>
339
340:)
Note: See TracBrowser for help on using the repository browser.