source: MDRepository/trunk/xquery/cmd-model.xqm @ 802

Last change on this file since 802 was 802, checked in by ljo, 14 years ago

cmd-model.xqm - move som more lookups to ft:query().

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 18.1 KB
Line 
1module namespace cmd-model = "http://spraakbanken.gu.se/clarin/xquery/model";
2
3(:
4 $Id: cmd-model.xqm 802 2010-10-18 14:59:43Z ljo $
5:)
6
7import module namespace xdb="http://exist-db.org/xquery/xmldb";
8import module namespace util="http://exist-db.org/xquery/util";
9
10declare variable $cmd-model:cmdiDatabaseURI as xs:string := "xmldb:exist:///db";
11
12declare variable $cmd-model:commonFreqsPath as xs:string := "/db/common/clarin/freqs";
13declare variable $cmd-model:cmdiMirrorPath as xs:string := "/db/cmdi-mirror";
14
15declare variable $cmd-model:getCollections as xs:string := "getCollections";
16declare variable $cmd-model:queryModel as xs:string := "queryModel";
17declare variable $cmd-model:searchRetrieve as xs:string := "searchRetrieve";
18
19declare variable $cmd-model:typeActorPath as xs:string := "MDGroup/Actors/Actor";
20declare variable $cmd-model:typeActorPath0 as xs:string := "Actor";
21declare variable $cmd-model:typeActorRolePath as xs:string := "MDGroup/Actors/Actor/Role";
22
23declare variable $cmd-model:docTypeTerms as xs:string := "Terms";
24declare variable $cmd-model:docTypeSuffix as xs:string := "Values";
25
26declare variable $cmd-model:responseFormatXml as xs:string := "xml";
27declare variable $cmd-model:responseFormatJSon as xs:string := "json";
28declare variable $cmd-model:responseFormatText as xs:string := "text";
29
30declare variable $cmd-model:collectionDocName as xs:string := "collection.xml";
31
32declare variable $cmd-model:collectionRoot as xs:string := "root";
33
34declare variable $cmd-model:xmlExt as xs:string := ".xml";
35
36declare variable $cmd-model:valuesLimit as xs:integer := 100;
37
38
39(:~
40  API function queryModel.
41:)
42declare function cmd-model:query-model($cmd-index-path as xs:string, $collection as xs:string+, $format as xs:string, $max-depth as xs:integer) as item()? {
43        cmd-model:serialise-as(cmd-model:get-result-doc($collection, $cmd-index-path, $max-depth), $format)
44};
45
46(:~
47  API function getCollections.
48:)
49declare function cmd-model:get-collections($collections as xs:string+, $format as xs:string, $max-depth as xs:integer) as item() {
50  let $names := ($cmd-model:commonFreqsPath, cmd-model:make-compound-doc-name($collections, "collection", xs:string($max-depth))),
51        $dummy := 
52      if (cmd-model:is-doc-available($names[1], $names[2])) then
53        ()
54      else
55        let $children := 
56          for $collection-item in $collections
57            return
58            for $collection-doc in cmd-model:get-resource-by-handle($collection-item) 
59              return cmd-model:recurse-collections($collection-doc, cmd-model:get-md-collection-name($collection-doc), $collection-doc//MdSelfLink, "", $max-depth)
60            let $res-count := sum($children/@cnt)
61            let $coll-count := sum($children/@cnt_subcolls) + count($children) 
62              return cmd-model:store-collection-data(<Collections cnt="{$res-count}" cnt_subcolls="{$coll-count}" cnt_children="{count($children)}" root="{$collections}">{$children}</Collections>, $names[1], $names[2])
63
64        return 
65        cmd-model:serialise-as(cmd-model:get-doc($names[1], $names[2]), $format)
66};
67
68(:~
69  API function searchRetrieve.
70:)
71declare function cmd-model:search-retrieve($cql-query as xs:string, $collections as xs:string+, $format as xs:string, $start-item as xs:integer, $end-item as xs:integer) as item()* {
72  let $collection := collection($cmd-model:cmdiMirrorPath),
73    $results :=
74    if ($collections[1] eq $cmd-model:collectionRoot) then
75      util:eval(fn:concat("$collection", xdb:decode($cql-query), "/ancestor::CMD"))
76    else
77      for $coll in $collections return util:eval(fn:concat("$collection", xdb:decode($cql-query), "/ancestor::CMD[descendant::IsPartOf = '", xdb:decode($coll) ,"']"))
78
79    let $result-count := fn:count($results),
80    $result-seq := fn:subsequence($results, $start-item, $end-item),
81    $seq-count := fn:count($result-seq),
82    $result-fragment :=
83    <searchRetrieveResponse>
84      <numberOfRecords>{$result-count}</numberOfRecords>
85      <echoedSearchRetrieveRequest>{$cql-query, $collections, $start-item, $end-item}</echoedSearchRetrieveRequest>
86      <diagnostics>{$seq-count}</diagnostics>
87      <records>
88        {$result-seq}
89      </records>
90    </searchRetrieveResponse>
91
92    return
93        cmd-model:serialise-as($result-fragment, $format)
94
95};
96
97(: **********************
98        queryModel - subfunctions
99:)
100
101
102declare function cmd-model:elem($collections as xs:string+, $path as xs:string, $depth as xs:integer) as element() {
103  let $collection := collection($cmd-model:cmdiMirrorPath),
104    $path-nodes :=
105    if ($collections[1] eq $cmd-model:collectionRoot) then
106      util:eval(fn:concat("$collection/descendant-or-self::", $path))
107    else
108      for $coll in $collections
109      return
110        util:eval(fn:concat("$collection/ft:query(descendant::IsPartOf, <query><term>", xdb:decode($coll), "</term></query>)/ancestor-or-self::CMD/descendant-or-self::", $path))
111    return
112      cmd-model:elem-r($path-nodes, $path, $depth, $depth)
113};
114
115declare function cmd-model:elem-r($path-nodes as node()*, $path as xs:string, $max-depth as xs:integer, $depth as xs:integer) as element() {
116      let $path-count := count($path-nodes),
117        $child-elements := $path-nodes/child::element(),
118        $subs := distinct-values($child-elements/name()),
119        $nodes-child-terminal := if (empty($child-elements)) then $path-nodes else () (: Maybe some selected elements $child-elements[not(element())] later on :),
120        $text-nodes := $nodes-child-terminal/text(),
121        $text-count := count($text-nodes),
122        $text-count-distinct := count(distinct-values($text-nodes))
123        return 
124(:      <Term path="{fn:concat("//", $path)}" name="{text:groups($path, "/([^/]+)$")[last()]}" count="{$path-count}" count_text="{$text-count}"  count_distinct_text="{$text-count-distinct}">{ :)
125        <Term path="{fn:concat("//", $path)}" name="{(text:groups($path, "/([^/]+)$")[last()],$path)[1] }" count="{$path-count}" count_text="{$text-count}"  count_distinct_text="{$text-count-distinct}">{
126          if ($depth > 0) then
127            (for $elname in $subs[. != '']
128            return
129              cmd-model:elem-r(util:eval(concat("$path-nodes/", $elname)), concat($path, '/', $elname), $max-depth, $depth - 1),
130              if ($max-depth eq 1 and $text-count gt 0) then cmd-model:values($path-nodes) else ())
131          else 'maxdepth'
132        }</Term>
133};
134
135declare function cmd-model:values($nodes as node()*) as node()* {
136let $keys := distinct-values($nodes/text())
137let $values := for $key at $pos in $keys
138  let $kcount := count($nodes[. eq $key])
139    order by lower-case($key) ascending
140    return <v key="{$key}" cnt="{$kcount}" />
141return
142  if ($cmd-model:valuesLimit eq 0) then $values
143  else
144  subsequence($values, 1, $cmd-model:valuesLimit)
145};
146
147declare function cmd-model:paths($n) {
148        for $el in $n
149        return <Term name="{$el/name()}"> {
150        for $anc in $el/parent::element()
151        return util:node-xpath($anc)
152        }</Term>
153};
154
155(:
156OBSOLETE??
157:)
158declare function cmd-model:recurse-collections-model($collection as xs:string, $type-name as xs:string, $depth as xs:integer, $name as xs:string) as item()* {
159    let $children := xdb:get-child-collections($collection)
160    return
161      if (fn:exists($children)) then
162          let $child-results :=
163            for $child in $children
164            return
165              cmd-model:recurse-collections-model(fn:concat($collection, '/', xs:string($child)), $type-name, $depth, $name),
166              $current := cmd-model:create-doc($collection, $type-name, $depth, $name)
167          return ($current, $child-results)
168      else
169        cmd-model:create-doc($collection, $type-name, $depth, $name)
170};
171
172
173(: **********************
174        getCollections - subfunctions
175:)
176
177(:
178  Get the MD resource by handle.
179:)
180declare function cmd-model:get-resource-by-handle($id as xs:string) as node()* {
181  let $collection := collection($cmd-model:cmdiMirrorPath)
182  return 
183    if ($id eq "" or $id eq $cmd-model:collectionRoot) then
184    $collection//IsPartOf[. = $cmd-model:collectionRoot]/ancestor::CMD
185  else
186    util:eval(concat("$collection/ft:query(descendant::MdSelfLink, <term>", xdb:decode($id), "</term>)/ancestor::CMD"))
187 (: $collection/descendant::MdSelfLink[. = xdb:decode($id)]/ancestor::CMD :)
188
189};
190
191(:
192  Recurse down in collections.
193:)
194declare function cmd-model:recurse-collections($collection as node(), $name as xs:string, $handle as xs:string, $proxy-id as xs:string, $depth as xs:integer) as item()* {
195  let $children :=  if ($depth eq 0) then () else cmd-model:get-children-colls($collection)
196  let $dummy := util:log('debug', fn:concat(cmd-model:get-md-collection-name($collection), " ", $collection//MdSelfLink, " ", xs:string($depth), " CHILDREN = ", string-join(for $child in $children return $child//MdSelfLink, "#")))
197    return
198      if (fn:exists($children)) then
199        let $child-results :=
200          for $child in $children
201            (: let $child-doc := if (empty($child/unresolvable-uri)) then
202                cmd-model:get-resource-by-handle($child/ResourceRef) else (), :)
203            let $child-name := cmd-model:get-md-collection-name($child)
204            let $proxyid := $collection//ResourceProxy[ResourceRef = $child//MdSelfLink]/@id 
205            return
206              if (empty($child)) then ()
207            else 
208              cmd-model:recurse-collections($child, $child-name, $child//Header/MdSelfLink, $proxyid, $depth - 1)
209
210          return
211          <c n="{$name}" handle="{$handle}" proxy-id="{$proxy-id}" cnt="{sum($child-results/@cnt)}" cnt_subcolls="{if ($handle eq '') then '-1' else cmd-model:get-collection-count($handle)}" cnt_children="{count($child-results)}" >{$child-results}</c>
212      else
213        <c n="{$name}" handle="{$handle}" proxy-id="{$proxy-id}" cnt_subcolls="{if ($handle eq '') then '-1' else cmd-model:get-collection-count($handle)}" cnt="{if ($handle eq '') then '-1' else cmd-model:get-resource-count($handle)}"></c>
214
215};
216
217(:
218  Get the next level collection-records (ResourceType='Metadata')
219  rely on the ResourceProxy of the parent (param)
220:)
221declare function cmd-model:get-children-colls($collection as node()) as node()* {
222  let $handle := $collection//MdSelfLink/text(),
223    $cmdi-collection := collection($cmd-model:cmdiMirrorPath)
224  return util:eval(concat("$cmdi-collection/ft:query(descendant::IsPartOf, <term>", $handle, "</term>)/ancestor::CMD[descendant::ResourceType[. = 'Metadata']]"))
225    (: collection($cmd-model:cmdiMirrorPath)/descendant::IsPartOf[. eq $handle]/ancestor::CMD[descendant::ResourceType[. = "Metadata"]] :)
226};
227
228(: count ALL (independent of maxDepth) resource-records (ie actually ResourceType=Resource, but
229                        there are records without ResourceProxy[ResourceType=Resource] - so care for that (not(exists((ResourceType))))
230:)
231declare function cmd-model:get-resource-count($handle as xs:string) as xs:string {
232        xs:string(count(collection($cmd-model:cmdiMirrorPath)//IsPartOf[. eq $handle]/ancestor::CMD[descendant::ResourceType[. = "Resource"] or not(exists(descendant::ResourceType)) ]))
233};
234
235(: this is complement to cmd-model:get-resource-count()
236                 count ALL (independent of maxDepth) collection-records (ie ResourceType=Metadata                       
237:)
238declare function cmd-model:get-collection-count($handle as xs:string) as xs:string {
239        xs:string(count(collection($cmd-model:cmdiMirrorPath)//IsPartOf[. eq $handle]/ancestor::CMD[descendant::ResourceType[. = "Metadata"]]))
240};
241
242(:
243 Try to derive a name from the collection-record (more-or-less agnostic about the actual schema.
244:)
245declare function cmd-model:get-md-collection-name($collection-doc as node()) as xs:string {
246($collection-doc//Corpus/Name, $collection-doc//Session/Name, $collection-doc//Collection/GeneralInfo/Name, $collection-doc//Collection/GeneralInfo/Title, $collection-doc//Name, $collection-doc//name, $collection-doc//Title, $collection-doc//title, "UNKNOWN")[1]
247};
248
249(: ***********************
250HELPER function - dealing with caching the results
251:)
252
253(:
254
255:)
256declare function cmd-model:create-doc($collections as xs:string+, $type-name as xs:string, $depth as xs:integer, $name as xs:string) as xs:string* {
257  (: if newer data available :)
258    cmd-model:store-result($collections, cmd-model:elem($collections, $type-name, $depth), $name, $depth)
259  (:else () :)
260};
261
262(:
263
264:)
265declare function cmd-model:get-result-doc($collections as xs:string+, $type-name as xs:string, $depth as xs:integer) as item()* {
266  let $name := cmd-model:make-compound-doc-name(($collections, $type-name), "values", xs:string($depth)),
267    $dummy := if (cmd-model:is-result-available($cmd-model:commonFreqsPath, $name)) then
268    ()
269    else
270      cmd-model:create-doc($collections, $type-name, $depth, $name)
271    return
272      cmd-model:get-doc($cmd-model:commonFreqsPath, $name)
273};
274
275(:
276 Generic get-doc(collection, docname)
277:)
278declare function cmd-model:get-doc($collection as xs:string, $doc-name as xs:string) as item()* {
279      fn:doc(fn:concat($collection, "/", $doc-name))
280};
281
282
283(:
284  Function for telling wether the result is already available or not.
285:)
286declare function cmd-model:is-result-available($collection as xs:string, $result-ref as xs:string) as xs:boolean {
287  fn:doc-available(fn:concat($collection, "/", $result-ref))
288};
289
290(:
291  Function for telling wether the document is available or not.
292:)
293declare function cmd-model:is-doc-available($collection as xs:string, $doc-name as xs:string) as xs:boolean {
294  fn:doc-available(fn:concat($collection, "/", $doc-name))
295};
296
297(:
298  Store the calculated frequencies for reuse.
299  If more than one collection is given the result is stored in the common
300  collection for reuse.
301:)
302declare function cmd-model:store-result($coll-names as xs:string+, $entries as element()*, $type-name as xs:string, $depth as xs:integer) as xs:string {
303  let $clarin-writer := fn:doc("/db/clarin/writer.xml"),
304    $dummy := xdb:login($cmd-model:cmdiDatabaseURI, $clarin-writer//write-user/text(), $clarin-writer//write-user-cred/text())
305    return
306        xdb:store($cmd-model:commonFreqsPath, $type-name, cmd-model:make-doc-element-of-type($type-name, $coll-names, $entries, xs:string($depth)))
307};
308
309(:
310  Store the collection listing for given collection.
311:)
312declare function cmd-model:store-collection-data($data as node(), $collection-path as xs:string, $doc-name as xs:string) as xs:string? {
313  let $clarin-writer := fn:doc("/db/clarin/writer.xml"),
314  $dummy := xdb:login($cmd-model:cmdiDatabaseURI, $clarin-writer//write-user/text(), $clarin-writer//write-user-cred/text()),
315  $store := (: util:catch("org.exist.xquery.XPathException", :) xdb:store($collection-path, $doc-name, $data),(: , ()) :)
316  $stored-doc := doc(concat($collection-path, "/", $doc-name))
317  return $stored-doc
318  (: moved to get-collection
319        $coll-count := update value $stored-doc/Collections/@count with sum($stored-doc//c[not(c)]/@cnt)
320  return
321  for $i in $stored-doc//c[c][@cnt eq "-1"]
322  return update value $i/@cnt with sum($i//c[not(c)]/@cnt)
323                        :)
324};
325
326(:
327  Create document name for type () with or without collection path.
328:)
329declare function cmd-model:make-doc-name($coll-name as xs:string?, $type-name as xs:string, $depth as xs:string, $incl-path as xs:boolean) as xs:string {
330  let $doc-name := fn:concat($type-name, $depth, $cmd-model:xmlExt)
331  return
332    if ($incl-path) then
333      fn:concat($coll-name, "/", $doc-name)
334    else
335      $doc-name
336};
337
338(:
339  Create document name with md5-hash for selected collections (or types) for reuse.
340:)
341declare function cmd-model:make-compound-doc-name($coll-names as xs:string+, $type-name as xs:string, $depth as xs:string) as xs:string {
342  let $name-prefix := fn:concat($type-name, $depth),
343    $sorted-names := for $coll in $coll-names order by $coll ascending return $coll
344    return
345    fn:concat($name-prefix, "-", util:hash(string-join($sorted-names, ""), "MD5"), $cmd-model:xmlExt)
346};
347
348(:
349  Skapa ett element av angiven typ.
350:)
351declare function cmd-model:make-element-of-type($type-name as xs:string, $count as xs:string, $text-count as xs:string, $text-types-count as xs:string, $value as xs:string) as element() {
352  element {$type-name} {
353
354      attribute count {$freq},
355      attribute text-count {$rank},
356      attribute text-types-count {$text-types},
357      text {$value} 
358  }
359};
360
361(:
362  Skapa ett dokumentelement av angiven typ.
363:)
364declare function cmd-model:make-doc-element-of-type($type-name as xs:string, $coll-names as xs:string*, $entries as element()*, $depth as xs:string) as element() {
365      let $depth-value := attribute depth {$depth},
366      $coll-names-value := if (fn:empty($coll-names)) then () else attribute colls {fn:string-join($coll-names, ",")}
367      return
368        element {cmd-model:get-doc-type-element-name($type-name)} {
369          $depth-value,
370          $coll-names-value,
371          attribute created {fn:current-dateTime()},
372          $entries
373        }
374};
375
376(:
377  Get element name for document type.
378:)
379declare function cmd-model:get-doc-type-element-name($type-name as xs:string) as xs:string {
380  $cmd-model:docTypeTerms
381};
382
383(:
384  Seraliseringsformat.
385:)
386declare function cmd-model:serialise-as($item as node()?, $format as xs:string) as item()? {
387      if ($format eq $cmd-model:responseFormatJSon) then
388        let $option := util:declare-option("exist:serialize", "method=text media-type=application/json")
389          return
390           (: json:xml-to-json($item) :) $item
391      else (: $cmd-model:responseFormatXml, $cmd-model:responseFormatText:)
392        $item
393};
394
395
396(:
397{cmdComponent}   //{cmdComponent}        Actor   //Actor
398{cmdPath}.      //{cmdPath}/{cmdComponent}      Actor.Contact.Phone     //Actor/Contact/Phone
399{cmdIndex} {rel} {term}         //{cmdIndex}[\. {rel} '{term}']         Actors.Actor.Sex=f      //Actors/Actor/Sex[.='f']
400{cmdIndex} any {term}   //{cmdIndex}[contains(. '{term}')]      Organisation.Name any University        //Organisation/Name[contains(.,'University')]
401and, or, and not        ?!      Organisation.Name any University and Actor.gender=m     ?!
402
403//MDGroup[Actors/Actor/Role[.='sponsor'] and Actors/Actor/Name[contains(.,'a')]]
404//Title[starts-with(.,'a')]
405//Title[starts-with(.,'A')]
406//Title[contains(.,'analysis')]
407http://demo.spraakdata.gu.se/clarin/cmd/model/stats?operation=searchRetrieve&query=//Title[contains(.,'analysis')]&collection=
408
409<record>
410  <recordSchema>info:srw/schema/1/dc-v1.1</recordSchema>
411  <recordPacking>xml</recordPacking>
412  <recordData>
413    <srw_dc:dc xmlns:srw_dc="info:srw/schema/1/dc-v1.1">
414     <dc:title>This is a Sample Record</dc:title>
415    </srw_dc:dc>
416  </recordData>
417  <recordPosition>1</recordPosition>
418  <extraRecordData>
419    <rel:score xmlns:rel="info:srw/extensions/2/rel-1.0">
420      0.965
421    </rel:rank>
422   </extraRecordData>
423</record>
424
425<searchRetrieveResponse>
426        <numberOfRecords>integer</numberOfRecords>
427        <echoedSearchRetrieveRequest>query itself (together with the context-collection) </echoedSearchRetrieveRequest>
428        <diagnostics>if necessary</diagnostics>
429        <records>
430                ....
431        </records>
432</searchRetrieveResponse>
433
434:)
Note: See TracBrowser for help on using the repository browser.