source: MDRepository/trunk/xquery/cmd-model.xqm @ 834

Last change on this file since 834 was 834, checked in by ljo, 14 years ago

cmd-model.xqm - no need to rewrite to predicate for * etc.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 14.3 KB
Line 
1module namespace cmd-model = "http://spraakbanken.gu.se/clarin/xquery/model";
2
3(:
4 $Id: cmd-model.xqm 834 2010-11-03 09:45:24Z ljo $
5:)
6
7import module namespace xdb="http://exist-db.org/xquery/xmldb";
8import module namespace util="http://exist-db.org/xquery/util";
9
10declare variable $cmd-model:cmdiDatabaseURI as xs:string := "xmldb:exist:///db";
11
12declare variable $cmd-model:cmdiMirrorPath as xs:string := "/db/cmdi-mirror";
13declare variable $cmd-model:cachePath as xs:string := "/db/cache";
14
15declare variable $cmd-model:getCollections as xs:string := "getCollections";
16declare variable $cmd-model:queryModel as xs:string := "queryModel";
17declare variable $cmd-model:searchRetrieve as xs:string := "searchRetrieve";
18
19declare variable $cmd-model:typeActorPath as xs:string := "MDGroup/Actors/Actor";
20declare variable $cmd-model:typeActorPath0 as xs:string := "Actor";
21declare variable $cmd-model:typeActorRolePath as xs:string := "MDGroup/Actors/Actor/Role";
22
23declare variable $cmd-model:docTypeTerms as xs:string := "Terms";
24declare variable $cmd-model:docTypeSuffix as xs:string := "Values";
25
26declare variable $cmd-model:responseFormatXml as xs:string := "xml";
27declare variable $cmd-model:responseFormatJSon as xs:string := "json";
28declare variable $cmd-model:responseFormatText as xs:string := "text";
29
30declare variable $cmd-model:collectionDocName as xs:string := "collection.xml";
31
32declare variable $cmd-model:collectionRoot as xs:string := "root";
33
34declare variable $cmd-model:xmlExt as xs:string := ".xml";
35
36declare variable $cmd-model:valuesLimit as xs:integer := 100;
37
38
39(:~
40  API function queryModel.
41:)
42declare function cmd-model:query-model($cmd-index-path as xs:string, $collection as xs:string+, $format as xs:string, $max-depth as xs:integer) as item()? {
43       
44  let $name := cmd-model:gen-cache-id("model", ($collection, $cmd-index-path), xs:string($max-depth)),
45    $doc := 
46    if (cmd-model:is-in-cache($name)) then
47      cmd-model:get-from-cache($name)
48    else
49      let $data := cmd-model:elem($collection, $cmd-index-path, $max-depth)
50        return cmd-model:store-in-cache($name, $data)
51  return 
52    cmd-model:serialise-as($doc, $format)       
53};
54
55(:~
56  API function getCollections.
57:)
58declare function cmd-model:get-collections($collections as xs:string+, $format as xs:string, $max-depth as xs:integer) as item() {
59  let $name := cmd-model:gen-cache-id("collection", $collections, xs:string($max-depth)),
60    $doc := 
61    if (cmd-model:is-in-cache($name)) then
62       cmd-model:get-from-cache($name)
63    else
64      let $data := cmd-model:colls($collections, $max-depth)
65        return cmd-model:store-in-cache($name, $data)
66  return 
67    cmd-model:serialise-as($doc, $format)
68};
69
70(:~
71  API function searchRetrieve.
72:)
73declare function cmd-model:search-retrieve($xpath-query as xs:string, $collections as xs:string+, $format as xs:string, $start-item as xs:integer, $end-item as xs:integer) as item()* {
74  let $start-time := util:system-dateTime(),
75    $collection := collection($cmd-model:cmdiMirrorPath),
76    $decoded-query := xdb:decode($xpath-query),
77    $sanitized-query := cmd-model:sanitize-query($decoded-query),
78    $results :=
79    if ($collections[1] eq $cmd-model:collectionRoot) then
80      util:eval(fn:concat("$collection", $sanitized-query, "/ancestor-or-self::CMD"))
81    else
82      for $coll in $collections return util:eval(fn:concat("$collection/ft:query(descendant::IsPartOf, <term>", xdb:decode($coll) ,"</term>)/ancestor-or-self::CMD", $sanitized-query))
83
84    let $result-count := fn:count($results),
85    $result-seq := fn:subsequence($results, $start-item, $end-item),
86    $seq-count := fn:count($result-seq),
87    $end-time := util:system-dateTime(),
88    $result-fragment :=
89    <searchRetrieveResponse>
90      <numberOfRecords>{$result-count}</numberOfRecords>
91      <echoedSearchRetrieveRequest>{if ($decoded-query ne $sanitized-query) then concat("Rewritten to '", $sanitized-query, "'") else $xpath-query, $collections, $start-item, $end-item}</echoedSearchRetrieveRequest>
92      <extraResponseData>
93        <returnedRecords>{$seq-count}</returnedRecords>
94        <duration>{$end-time - $start-time}</duration>
95      </extraResponseData>
96      <records>
97        {$result-seq}
98      </records>
99    </searchRetrieveResponse>
100
101    return
102        cmd-model:serialise-as($result-fragment, $format)
103
104};
105
106(:
107  **********************
108  queryModel - subfunctions
109:)
110
111declare function cmd-model:sanitize-query($query as xs:string) as xs:string {
112let $last-segment := text:groups($query, "/([^/]+)$")[last()]
113return 
114  if ($query = ("//*", "descendant::element()")) then 
115    "" 
116  else if ($last-segment = ("Title", "Name", "Role", "Genre", "Country", "Continent", "MdSelfLink", "IsPartOf")) then
117    (: concat("ft:query(",:) if ($query eq concat("//", $last-segment)) then concat("[descendant::", $last-segment, "]") else concat("[", $query, "]") (:, ", <regex>.*</regex>)") :)
118  else $query
119};
120
121declare function cmd-model:elem($collections as xs:string+, $path as xs:string, $depth as xs:integer) as element() {
122  let $collection := collection($cmd-model:cmdiMirrorPath),
123    $path-nodes :=
124    if ($collections[1] eq $cmd-model:collectionRoot) then
125      util:eval(fn:concat("$collection/descendant-or-self::", $path))
126    else
127      for $coll in $collections
128      return
129        util:eval(fn:concat("$collection/ft:query(descendant::IsPartOf, <query><term>", xdb:decode($coll), "</term></query>)/ancestor-or-self::CMD/descendant-or-self::", $path))
130   
131                let $entries := cmd-model:elem-r($path-nodes, $path, $depth, $depth),
132                        $coll-names-value := if (fn:empty($collections)) then () else attribute colls {fn:string-join($collections, ",")},
133                $result := element {$cmd-model:docTypeTerms} {
134                  $coll-names-value,
135                  attribute depth {$depth}, 
136                  attribute created {fn:current-dateTime()},
137                  $entries 
138                }
139    return $result             
140};
141
142declare function cmd-model:elem-r($path-nodes as node()*, $path as xs:string, $max-depth as xs:integer, $depth as xs:integer) as element() {
143      let $path-count := count($path-nodes),
144        $child-elements := $path-nodes/child::element(),
145        $subs := distinct-values($child-elements/name()),
146        $nodes-child-terminal := if (empty($child-elements)) then $path-nodes else () (: Maybe some selected elements $child-elements[not(element())] later on :),
147        $text-nodes := $nodes-child-terminal/text(),
148        $text-count := count($text-nodes),
149        $text-count-distinct := count(distinct-values($text-nodes))
150        return 
151(:      <Term path="{fn:concat("//", $path)}" name="{text:groups($path, "/([^/]+)$")[last()]}" count="{$path-count}" count_text="{$text-count}"  count_distinct_text="{$text-count-distinct}">{ :)
152        <Term path="{fn:concat("//", $path)}" name="{(text:groups($path, "/([^/]+)$")[last()],$path)[1] }" count="{$path-count}" count_text="{$text-count}"  count_distinct_text="{$text-count-distinct}">{
153          if ($depth > 0) then
154            (for $elname in $subs[. != '']
155            return
156              cmd-model:elem-r(util:eval(concat("$path-nodes/", $elname)), concat($path, '/', $elname), $max-depth, $depth - 1),
157              if ($max-depth eq 1 and $text-count gt 0) then cmd-model:values($path-nodes) else ())
158          else 'maxdepth'
159        }</Term>
160};
161
162declare function cmd-model:values($nodes as node()*) as node()* {
163let $keys := distinct-values($nodes/text())
164let $values := for $key at $pos in $keys
165  let $kcount := count($nodes[. eq $key])
166    order by lower-case($key) ascending
167    return <v key="{$key}" cnt="{$kcount}" />
168return
169  if ($cmd-model:valuesLimit eq 0) then $values
170  else
171  subsequence($values, 1, $cmd-model:valuesLimit)
172};
173
174declare function cmd-model:paths($n) {
175        for $el in $n
176        return <Term name="{$el/name()}"> {
177        for $anc in $el/parent::element()
178        return util:node-xpath($anc)
179        }</Term>
180};
181
182
183(:
184  **********************
185  getCollections - subfunctions
186:)
187
188declare function cmd-model:colls($collections as xs:string+, $max-depth as xs:integer) as element() {
189                let $children := 
190                for $collection-item in $collections
191                        return
192                                for $collection-doc in cmd-model:get-resource-by-handle($collection-item) 
193                                return cmd-model:colls-r($collection-doc, cmd-model:get-md-collection-name($collection-doc), $collection-doc//MdSelfLink, "", $max-depth)
194                let $res-count := sum($children/@cnt)
195                let $coll-count := sum($children/@cnt_subcolls) + count($children) 
196                let $data := <Collections cnt="{$res-count}" cnt_subcolls="{$coll-count}" cnt_children="{count($children)}" root="{$collections}">{$children}</Collections>                     
197                return $data
198};
199
200(:
201  Recurse down in collections.
202:)
203declare function cmd-model:colls-r($collection as node(), $name as xs:string, $handle as xs:string, $proxy-id as xs:string, $depth as xs:integer) as item()* {
204  let $children :=  if ($depth eq 1) then () else cmd-model:get-children-colls($collection)
205  (: let $dummy := util:log('debug', fn:concat(cmd-model:get-md-collection-name($collection), " ", $collection//MdSelfLink, " ", xs:string($depth), " CHILDREN = ", string-join(for $child in $children return $child//MdSelfLink, "#"))) :)
206    return
207      if (fn:exists($children)) then
208        let $child-results :=
209          for $child in $children
210            (: let $child-doc := if (empty($child/unresolvable-uri)) then
211                cmd-model:get-resource-by-handle($child/ResourceRef) else (), :)
212            let $child-name := cmd-model:get-md-collection-name($child)
213            let $proxyid := ($collection//ResourceProxy[ResourceRef = $child//MdSelfLink]/@id, concat("UNKNOWN proxy id:", $child//MdSelfLink))[1] 
214            return
215              cmd-model:colls-r($child, $child-name, $child//Header/MdSelfLink, $proxyid, $depth - 1)
216
217          return
218          <c n="{$name}" handle="{$handle}" proxy-id="{$proxy-id}" cnt="{sum($child-results/@cnt)}" cnt_subcolls="{if ($handle eq '') then '-1' else cmd-model:get-collection-count($handle)}" cnt_children="{count($child-results)}" >{$child-results}</c>
219      else
220        <c n="{$name}" handle="{$handle}" proxy-id="{$proxy-id}" cnt_subcolls="{if ($handle eq '') then '-1' else cmd-model:get-collection-count($handle)}" cnt="{if ($handle eq '') then '-1' else cmd-model:get-resource-count($handle)}"></c>
221
222};
223
224(:
225  Get the MD resource by handle.
226:)
227declare function cmd-model:get-resource-by-handle($id as xs:string) as node()* {
228  let $collection := collection($cmd-model:cmdiMirrorPath)
229  return 
230    if ($id eq "" or $id eq $cmd-model:collectionRoot) then
231    $collection//IsPartOf[. = $cmd-model:collectionRoot]/ancestor::CMD
232  else
233    util:eval(concat("$collection/ft:query(descendant::MdSelfLink, <term>", xdb:decode($id), "</term>)/ancestor::CMD"))
234 (: $collection/descendant::MdSelfLink[. = xdb:decode($id)]/ancestor::CMD :)
235};
236
237(:
238  Get the next level collection-records (ResourceType='Metadata')
239  rely on the ResourceProxy of the parent (param)
240:)
241declare function cmd-model:get-children-colls($collection as node()) as node()* {
242  let $handle := $collection//MdSelfLink/text(),
243    $cmdi-collection := collection($cmd-model:cmdiMirrorPath)
244  return util:eval(concat("$cmdi-collection/ft:query(descendant::IsPartOf, <term>", $handle, "</term>)/ancestor::CMD[descendant::ResourceType[. = 'Metadata']]"))
245    (: collection($cmd-model:cmdiMirrorPath)/descendant::IsPartOf[. eq $handle]/ancestor::CMD[descendant::ResourceType[. = "Metadata"]] :)
246};
247
248(:
249  count ALL (independent of maxDepth) resource-records (ie actually ResourceType=Resource, but
250  there are records without ResourceProxy[ResourceType=Resource] -
251  so care for that (not(exists((ResourceType))))
252:)
253declare function cmd-model:get-resource-count($handle as xs:string) as xs:string {
254        xs:string(count(collection($cmd-model:cmdiMirrorPath)//IsPartOf[. eq $handle]/ancestor::CMD[descendant::ResourceType[. = "Resource"] or not(exists(descendant::ResourceType)) ]))
255};
256
257(:
258  This is complement to cmd-model:get-resource-count()
259  count ALL (independent of maxDepth) collection-records
260  (ie ResourceType=Metadata)                   
261:)
262declare function cmd-model:get-collection-count($handle as xs:string) as xs:string {
263        xs:string(count(collection($cmd-model:cmdiMirrorPath)//IsPartOf[. eq $handle]/ancestor::CMD[descendant::ResourceType[. = "Metadata"]]))
264};
265
266(:
267  Try to derive a name from the collection-record (more-or-less agnostic about
268  the actual schema.
269:)
270declare function cmd-model:get-md-collection-name($collection-doc as node()) as xs:string {
271($collection-doc//Corpus/Name, $collection-doc//Session/Name, $collection-doc//Collection/GeneralInfo/Name, $collection-doc//Collection/GeneralInfo/Title, $collection-doc//Name, $collection-doc//name, $collection-doc//Title, $collection-doc//title, "UNKNOWN")[1]
272};
273
274(:
275  ***********************
276  HELPER function - dealing with caching the results
277:)
278
279
280(:
281  Function for telling wether the document is available or not.
282  generic, currently not used
283:)
284declare function cmd-model:is-doc-available($collection as xs:string, $doc-name as xs:string) as xs:boolean {
285  fn:doc-available(fn:concat($collection, "/", $doc-name))
286};
287
288declare function cmd-model:is-in-cache($doc-name as xs:string) as xs:boolean {
289  fn:doc-available(fn:concat($cmd-model:cachePath, "/", $doc-name))
290};
291
292declare function cmd-model:get-from-cache($doc-name as xs:string) as item()* {
293      fn:doc(fn:concat($cmd-model:cachePath, "/", $doc-name))
294};
295
296(: 
297  Store the collection listing for given collection.
298:)
299declare function cmd-model:store-in-cache($doc-name as xs:string, $data as node()) as item()* {
300  let $clarin-writer := fn:doc("/db/clarin/writer.xml"),
301  $dummy := xdb:login($cmd-model:cachePath, $clarin-writer//write-user/text(), $clarin-writer//write-user-cred/text())
302  let $store := (: util:catch("org.exist.xquery.XPathException", :) xdb:store($cmd-model:cachePath, $doc-name, $data), (: , ()) :)
303  $stored-doc := fn:doc(concat($cmd-model:cachePath, "/", $doc-name))
304  return $stored-doc
305};
306
307(:
308  Create document name with md5-hash for selected collections (or types)
309  for reuse.
310:)
311declare function cmd-model:gen-cache-id($type-name as xs:string, $keys as xs:string+, $depth as xs:string) as xs:string {
312  let $name-prefix := fn:concat($type-name, $depth),
313    $sorted-names := for $key in $keys order by $key ascending return $key
314    return
315    fn:concat($name-prefix, "-", util:hash(string-join($sorted-names, ""), "MD5"), $cmd-model:xmlExt)
316};
317
318(:
319  Seraliseringsformat.
320:)
321declare function cmd-model:serialise-as($item as node()?, $format as xs:string) as item()? {
322      if ($format eq $cmd-model:responseFormatJSon) then
323        let $option := util:declare-option("exist:serialize", "method=text media-type=application/json")
324          return
325           (: json:xml-to-json($item) :) $item
326      else (: $cmd-model:responseFormatXml, $cmd-model:responseFormatText:)
327        $item
328};
Note: See TracBrowser for help on using the repository browser.