source: MDService/trunk/xquery/cmd-model.xqm @ 298

Last change on this file since 298 was 298, checked in by ljo, 14 years ago

First round of searchRetrieve results.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 12.4 KB
Line 
1module namespace cmd-model = "http://spraakbanken.gu.se/clarin/xquery/model";
2
3(:
4 $Id: cmd-model.xqm 298 2010-03-26 16:43:58Z ljo $
5:)
6
7import module namespace xdb="http://exist-db.org/xquery/xmldb";
8import module namespace util="http://exist-db.org/xquery/util";
9
10declare variable $cmd-model:cmdiDatabaseURI as xs:string := "xmldb:exist:///db";
11
12declare variable $cmd-model:commonFreqsPath as xs:string := "/db/common/clarin/freqs";
13declare variable $cmd-model:cmdiMirrorPath as xs:string := "/db/cmdi-mirror";
14
15declare variable $cmd-model:getCollections as xs:string := "getCollections";
16declare variable $cmd-model:queryModel as xs:string := "queryModel";
17declare variable $cmd-model:searchRetrieve as xs:string := "searchRetrieve";
18
19declare variable $cmd-model:typeActorPath as xs:string := "MDGroup/Actors/Actor";
20declare variable $cmd-model:typeActorPath0 as xs:string := "Actor";
21declare variable $cmd-model:typeActorRolePath as xs:string := "MDGroup/Actors/Actor/Role";
22
23declare variable $cmd-model:docTypeTerms as xs:string := "Terms";
24declare variable $cmd-model:docTypeSuffix as xs:string := "Values";
25
26declare variable $cmd-model:responseFormatXml as xs:string := "xml";
27declare variable $cmd-model:responseFormatJSon as xs:string := "json";
28declare variable $cmd-model:responseFormatText as xs:string := "text";
29
30declare variable $cmd-model:collectionDocName as xs:string := "collection.xml";
31
32declare variable $cmd-model:xmlExt as xs:string := ".xml";
33
34declare function cmd-model:elem($collection as xs:string, $path as xs:string, $depth as xs:integer) as element() {
35  let $path-nodes := util:eval(fn:concat("collection('", $collection, "')//", $path))
36  let $path-count := count($path-nodes)
37   
38  let $subs := distinct-values($path-nodes/child::element()/name())
39  let $text-nodes := $path-nodes//text()
40  let $text-count := count($text-nodes)
41  let $text-count-distinct := count(distinct-values($text-nodes))
42  return 
43        <Term path="{fn:concat("//", $path)}" name="{text:groups($path, "/([^/]+)$")[last()]}" count="{$path-count}" count_text="{$text-count}"  count_distinct_text="{$text-count-distinct}">{
44          if ($depth > 0) then
45            for $elname in $subs[. != '']
46            return
47              cmd-model:elem($collection, concat($path, '/', $elname), $depth - 1)
48          else 'maxdepth'
49        }</Term>
50};
51
52declare function cmd-model:paths($n) {
53        for $el in $n
54        return <Term name="{$el/name()}"> {
55        for $anc in $el/parent::element()
56        return util:node-xpath($anc)
57        }</Term>
58};
59
60(:
61
62:)
63declare function cmd-model:recurse-collections-model($collection as xs:string, $type-name as xs:string, $depth as xs:integer) as item()* {
64    let $children := xdb:get-child-collections($collection)
65    return
66      if (fn:exists($children)) then
67          let $child-results :=
68            for $child in $children
69            return
70              cmd-model:recurse-collections-model(fn:concat($collection, '/', xs:string($child)), $type-name, $depth),
71              $current := cmd-model:create-doc($collection, $type-name, $depth)
72          return ($current, $child-results)
73      else
74        cmd-model:create-doc($collection, $type-name, $depth)
75};
76
77(:
78  Recurse for collections
79:)
80declare function cmd-model:recurse-collections($collection as xs:string, $depth as xs:integer) as item()* {
81    let $children := xdb:get-child-collections($collection)
82    return
83      if (fn:exists($children)) then
84          let $child-results :=
85            for $child in $children
86            return
87              cmd-model:recurse-collections(concat($collection, '/', xs:string($child)), $depth),
88              $current := <Collection name="{text:groups($collection, "/([^/]+)$")[last()]}">{$collection}</Collection>
89          return ($current, $child-results)
90      else
91      <Collection>{$collection}</Collection>
92};
93
94(:
95
96:)
97declare function cmd-model:create-doc($collection as xs:string, $type-name as xs:string, $depth as xs:integer) as xs:string* {
98  (: if newer data available :)
99    cmd-model:store-result($collection, cmd-model:elem($collection, $type-name, $depth), $type-name, $depth)
100  (:else () :)
101};
102
103(:
104
105:)
106declare function cmd-model:get-result-doc($collection as xs:string, $type-name as xs:string, $depth as xs:integer) as item()* {
107  let $name-last := text:groups($type-name, "/(\w+)$")[last()],
108    $new-name := if (fn:empty($name-last)) then $type-name else $name-last,
109    $dummy := if (cmd-model:is-result-available($collection, fn:concat("/", $new-name, xs:string($depth)))) then
110    ()
111    else
112      cmd-model:create-doc($collection, $type-name, $depth)
113    return
114      fn:doc(fn:concat($collection, "/", $new-name, xs:string($depth), $cmd-model:xmlExt))
115};
116
117(:
118 Generic get-doc(collection, docname)
119:)
120declare function cmd-model:get-doc($collection as xs:string, $doc-name as xs:string) as item()* {
121      fn:doc(fn:concat($collection, "/", $doc-name))
122};
123
124
125(:
126  Function for telling wether the result is already available or not.
127:)
128declare function cmd-model:is-result-available($collection as xs:string, $result-ref as xs:string) as xs:boolean {
129  fn:doc-available(fn:concat($collection, $result-ref, $cmd-model:xmlExt))
130};
131
132(:
133  Function for telling wether the document is available or not.
134:)
135declare function cmd-model:is-doc-available($collection as xs:string, $doc-name as xs:string) as xs:boolean {
136  fn:doc-available(fn:concat($collection, "/", $doc-name))
137};
138
139(:
140  Store the calculated frequencies for reuse.
141  If more than one collection is given the result is stored in the common
142  collection for reuse.
143:)
144declare function cmd-model:store-result($coll-names as xs:string+, $entries as element()*, $type-name as xs:string, $depth as xs:integer) as xs:string {
145  let $clarin-writer := fn:doc("/db/clarin/writer.xml"),
146    $dummy := xdb:login($cmd-model:cmdiDatabaseURI, $clarin-writer//write-user/text(), $clarin-writer//write-user-cred/text())
147    return
148   
149      if (fn:exists($coll-names[2])) then
150        (: Det gÀller fler Àn en samling. :)
151        xdb:store($cmd-model:commonFreqsPath, cmd-model:make-compound-doc-name($coll-names, $type-name, $depth), cmd-model:make-doc-element-of-type($type-name, $coll-names, $entries, $depth))
152      else
153        (: Det gÀller endast en samling. :)
154        let $dummy := util:log('debug', fn:concat('Stores ', $type-name, ' in ', $coll-names))
155        return xdb:store($coll-names, cmd-model:make-doc-name($coll-names, $type-name, xs:string($depth), fn:false()), cmd-model:make-doc-element-of-type($type-name, (), $entries, xs:string($depth)))
156};
157
158(:
159  Store the collection listing in give collection.
160:)
161declare function cmd-model:store-collection-data($data as node(), $collection-path as xs:string) as xs:string? {
162  let $clarin-writer := fn:doc("/db/clarin/writer.xml"),
163  $dummy := xdb:login($cmd-model:cmdiDatabaseURI, $clarin-writer//write-user/text(), $clarin-writer//write-user-cred/text())
164  return 
165      (: util:catch("org.exist.xquery.XPathException", :) xdb:store($collection-path, $cmd-model:collectionDocName, $data)(: , ()) :)
166};
167
168(:
169  Create document name for type () with or without collection path.
170:)
171declare function cmd-model:make-doc-name($coll-name as xs:string?, $type-name as xs:string, $depth as xs:string, $incl-path as xs:boolean) as xs:string {
172  let $doc-name := fn:concat($type-name, $depth, $cmd-model:xmlExt)
173  return
174    if ($incl-path) then
175      fn:concat($coll-name, "/", $doc-name)
176    else
177      $doc-name
178};
179
180(:
181  Create document name with md5-hash for selected collections (or types) for reuse.
182:)
183declare function cmd-model:make-compound-doc-name($coll-names as xs:string+, $type-name as xs:string, $depth as xs:string) as xs:string {
184  let $name-prefix := fn:concat($type-name, $depth)
185    return
186    fn:concat($name-prefix, "-", util:hash(string-join($coll-names, ""), "MD5"), $cmd-model:xmlExt)
187};
188
189(:
190  Skapa ett element av angiven typ.
191:)
192declare function cmd-model:make-element-of-type($type-name as xs:string, $count as xs:string, $text-count as xs:string, $text-types-count as xs:string, $value as xs:string) as element() {
193  element {$type-name} {
194
195      attribute count {$freq},
196      attribute text-count {$rank},
197      attribute text-types-count {$text-types},
198      text {$value} 
199  }
200};
201
202(:
203  Skapa ett dokumentelement av angiven typ.
204:)
205declare function cmd-model:make-doc-element-of-type($type-name as xs:string, $coll-names as xs:string*, $entries as element()*, $depth as xs:string) as element() {
206      let $depth-value := attribute depth {$depth},
207      $coll-names-value := if (fn:empty($coll-names)) then () else attribute colls {fn:string-join($coll-names, ",")}
208      return
209        element {cmd-model:get-doc-type-element-name($type-name)} {
210          $depth-value,
211          $coll-names-value,
212          attribute created {fn:current-dateTime()},
213          $entries
214        }
215};
216
217(:
218  Skapa elementnamn för dokumentet av typ.
219:)
220declare function cmd-model:get-doc-type-element-name($type-name as xs:string) as xs:string {
221  $cmd-model:docTypeTerms
222};
223
224
225
226
227(:
228  Seraliseringsformat.
229:)
230declare function cmd-model:serialise-as($item as node()?, $format as xs:string) as item()? {
231      if ($format eq $cmd-model:responseFormatJSon) then
232        let $option := util:declare-option("exist:serialize", "method=text media-type=application/json")
233          return
234           (: json:xml-to-json($item) :) $item
235      else (: $cmd-model:responseFormatXml, $cmd-model:responseFormatText:)
236        $item
237};
238
239
240(:~
241  API function queryModel.
242:)
243declare function cmd-model:query-model($cmd-index-path as xs:string, $collection as xs:string+, $format as xs:string, $max-depth as xs:integer) as item() {
244        cmd-model:serialise-as(cmd-model:get-result-doc($collection, $cmd-index-path, $max-depth), $format)
245};
246
247(:~
248  API function getCollections.
249:)
250declare function cmd-model:get-collections($collections as xs:string+, $format as xs:string, $max-depth as xs:integer) as item() {
251  (: fixme! - collections, is-doc-available only takes one collection.:)
252  let $dummy := if (cmd-model:is-doc-available($collections, $cmd-model:collectionDocName)) then
253    ()
254  else
255  let $children := for $collection-item in $collections
256                    return 
257                      cmd-model:recurse-collections($collection-item, $max-depth)
258   return
259     cmd-model:store-collection-data(<Collections count="{count($children)}" root="{$collections}">{$children}</Collections>, $collections)
260return 
261cmd-model:serialise-as(cmd-model:get-doc($collections, $cmd-model:collectionDocName), $format)
262};
263
264(:~
265  API function searchRetrieve.
266:)
267declare function cmd-model:search-retrieve($cql-query as xs:string, $collection as xs:string+, $format as xs:string, $start-item as xs:integer, $end-item as xs:integer) as item()* {
268  let $results := util:eval(fn:concat("collection('", xdb:decode($collection), "')", xdb:decode($cql-query), "/ancestor::CMD")),
269    $result-count := fn:count($results),
270    $result-seq := fn:subsequence($results, $start-item, $end-item),
271    $seq-count := fn:count($result-seq),
272    $result-fragment :=
273    <searchRetrieveResponse>
274      <numberOfRecords>{$result-count}</numberOfRecords>
275      <echoedSearchRetrieveRequest>{$cql-query, $collection, $start-item, $end-item}</echoedSearchRetrieveRequest>
276      <diagnostics>{$seq-count}</diagnostics>
277      <records>
278        {$result-seq}
279      </records>
280    </searchRetrieveResponse>
281
282    return
283        cmd-model:serialise-as($result-fragment, $format)
284
285};
286(:
287{cmdComponent}   //{cmdComponent}        Actor   //Actor
288{cmdPath}.      //{cmdPath}/{cmdComponent}      Actor.Contact.Phone     //Actor/Contact/Phone
289{cmdIndex} {rel} {term}         //{cmdIndex}[\. {rel} '{term}']         Actors.Actor.Sex=f      //Actors/Actor/Sex[.='f']
290{cmdIndex} any {term}   //{cmdIndex}[contains(. '{term}')]      Organisation.Name any University        //Organisation/Name[contains(.,'University')]
291and, or, and not        ?!      Organisation.Name any University and Actor.gender=m     ?!
292
293//MDGroup[Actors/Actor/Role[.='sponsor'] and Actors/Actor/Name[contains(.,'a')]]
294//Title[starts-with(.,'a')]
295//Title[starts-with(.,'A')]
296//Title[contains(.,'analysis')]
297http://demo.spraakdata.gu.se/clarin/cmd/model/stats?operation=searchRetrieve&query=//Title[contains(.,'analysis')]&collection=
298
299<record>
300  <recordSchema>info:srw/schema/1/dc-v1.1</recordSchema>
301  <recordPacking>xml</recordPacking>
302  <recordData>
303    <srw_dc:dc xmlns:srw_dc="info:srw/schema/1/dc-v1.1">
304     <dc:title>This is a Sample Record</dc:title>
305    </srw_dc:dc>
306  </recordData>
307  <recordPosition>1</recordPosition>
308  <extraRecordData>
309    <rel:score xmlns:rel="info:srw/extensions/2/rel-1.0">
310      0.965
311    </rel:rank>
312   </extraRecordData>
313</record>
314
315<searchRetrieveResponse>
316        <numberOfRecords>integer</numberOfRecords>
317        <echoedSearchRetrieveRequest>query itself (together with the context-collection) </echoedSearchRetrieveRequest>
318        <diagnostics>if necessary</diagnostics>
319        <records>
320                ....
321        </records>
322</searchRetrieveResponse>
323
324:)
Note: See TracBrowser for help on using the repository browser.