1 | module namespace cmd-model = "http://spraakbanken.gu.se/clarin/xquery/model"; |
---|
2 | |
---|
3 | (: |
---|
4 | $Id: cmd-model.xqm 301 2010-03-29 13:32:22Z ljo $ |
---|
5 | :) |
---|
6 | |
---|
7 | import module namespace xdb="http://exist-db.org/xquery/xmldb"; |
---|
8 | import module namespace util="http://exist-db.org/xquery/util"; |
---|
9 | |
---|
10 | declare variable $cmd-model:cmdiDatabaseURI as xs:string := "xmldb:exist:///db"; |
---|
11 | |
---|
12 | declare variable $cmd-model:commonFreqsPath as xs:string := "/db/common/clarin/freqs"; |
---|
13 | declare variable $cmd-model:cmdiMirrorPath as xs:string := "/db/cmdi-mirror"; |
---|
14 | |
---|
15 | declare variable $cmd-model:getCollections as xs:string := "getCollections"; |
---|
16 | declare variable $cmd-model:queryModel as xs:string := "queryModel"; |
---|
17 | declare variable $cmd-model:searchRetrieve as xs:string := "searchRetrieve"; |
---|
18 | |
---|
19 | declare variable $cmd-model:typeActorPath as xs:string := "MDGroup/Actors/Actor"; |
---|
20 | declare variable $cmd-model:typeActorPath0 as xs:string := "Actor"; |
---|
21 | declare variable $cmd-model:typeActorRolePath as xs:string := "MDGroup/Actors/Actor/Role"; |
---|
22 | |
---|
23 | declare variable $cmd-model:docTypeTerms as xs:string := "Terms"; |
---|
24 | declare variable $cmd-model:docTypeSuffix as xs:string := "Values"; |
---|
25 | |
---|
26 | declare variable $cmd-model:responseFormatXml as xs:string := "xml"; |
---|
27 | declare variable $cmd-model:responseFormatJSon as xs:string := "json"; |
---|
28 | declare variable $cmd-model:responseFormatText as xs:string := "text"; |
---|
29 | |
---|
30 | declare variable $cmd-model:collectionDocName as xs:string := "collection.xml"; |
---|
31 | |
---|
32 | declare variable $cmd-model:xmlExt as xs:string := ".xml"; |
---|
33 | |
---|
34 | declare function cmd-model:elem($collection as xs:string, $path as xs:string, $depth as xs:integer) as element() { |
---|
35 | let $path-nodes := util:eval(fn:concat("collection('", $collection, "')//", $path)) |
---|
36 | let $path-count := count($path-nodes) |
---|
37 | |
---|
38 | let $subs := distinct-values($path-nodes/child::element()/name()) |
---|
39 | let $text-nodes := $path-nodes//text() |
---|
40 | let $text-count := count($text-nodes) |
---|
41 | let $text-count-distinct := count(distinct-values($text-nodes)) |
---|
42 | return |
---|
43 | <Term path="{fn:concat("//", $path)}" name="{text:groups($path, "/([^/]+)$")[last()]}" count="{$path-count}" count_text="{$text-count}" count_distinct_text="{$text-count-distinct}">{ |
---|
44 | if ($depth > 0) then |
---|
45 | for $elname in $subs[. != ''] |
---|
46 | return |
---|
47 | cmd-model:elem($collection, concat($path, '/', $elname), $depth - 1) |
---|
48 | else 'maxdepth' |
---|
49 | }</Term> |
---|
50 | }; |
---|
51 | |
---|
52 | declare function cmd-model:paths($n) { |
---|
53 | for $el in $n |
---|
54 | return <Term name="{$el/name()}"> { |
---|
55 | for $anc in $el/parent::element() |
---|
56 | return util:node-xpath($anc) |
---|
57 | }</Term> |
---|
58 | }; |
---|
59 | |
---|
60 | (: |
---|
61 | |
---|
62 | :) |
---|
63 | declare function cmd-model:recurse-collections-model($collection as xs:string, $type-name as xs:string, $depth as xs:integer) as item()* { |
---|
64 | let $children := xdb:get-child-collections($collection) |
---|
65 | return |
---|
66 | if (fn:exists($children)) then |
---|
67 | let $child-results := |
---|
68 | for $child in $children |
---|
69 | return |
---|
70 | cmd-model:recurse-collections-model(fn:concat($collection, '/', xs:string($child)), $type-name, $depth), |
---|
71 | $current := cmd-model:create-doc($collection, $type-name, $depth) |
---|
72 | return ($current, $child-results) |
---|
73 | else |
---|
74 | cmd-model:create-doc($collection, $type-name, $depth) |
---|
75 | }; |
---|
76 | |
---|
77 | (: |
---|
78 | Recurse for collections |
---|
79 | :) |
---|
80 | declare function cmd-model:recurse-collections($collection as xs:string, $depth as xs:integer) as item()* { |
---|
81 | let $children := xdb:get-child-collections($collection) |
---|
82 | return |
---|
83 | if (fn:exists($children)) then |
---|
84 | let $child-results := |
---|
85 | for $child in $children |
---|
86 | return |
---|
87 | cmd-model:recurse-collections(concat($collection, '/', xs:string($child)), $depth), |
---|
88 | $current := <Collection name="{text:groups($collection, "/([^/]+)$")[last()]}">{$collection}</Collection> |
---|
89 | return ($current, $child-results) |
---|
90 | else |
---|
91 | <Collection>{$collection}</Collection> |
---|
92 | }; |
---|
93 | |
---|
94 | (: |
---|
95 | |
---|
96 | :) |
---|
97 | declare function cmd-model:create-doc($collection as xs:string, $type-name as xs:string, $depth as xs:integer) as xs:string* { |
---|
98 | (: if newer data available :) |
---|
99 | cmd-model:store-result($collection, cmd-model:elem($collection, $type-name, $depth), $type-name, $depth) |
---|
100 | (:else () :) |
---|
101 | }; |
---|
102 | |
---|
103 | (: |
---|
104 | |
---|
105 | :) |
---|
106 | declare function cmd-model:get-result-doc($collection as xs:string, $type-name as xs:string, $depth as xs:integer) as item()* { |
---|
107 | let $name-last := text:groups($type-name, "/(\w+)$")[last()], |
---|
108 | $new-name := if (fn:empty($name-last)) then $type-name else $name-last, |
---|
109 | $dummy := if (cmd-model:is-result-available($collection, fn:concat("/", $new-name, xs:string($depth)))) then |
---|
110 | () |
---|
111 | else |
---|
112 | cmd-model:create-doc($collection, $type-name, $depth) |
---|
113 | return |
---|
114 | fn:doc(fn:concat($collection, "/", $new-name, xs:string($depth), $cmd-model:xmlExt)) |
---|
115 | }; |
---|
116 | |
---|
117 | (: |
---|
118 | Generic get-doc(collection, docname) |
---|
119 | :) |
---|
120 | declare function cmd-model:get-doc($collection as xs:string, $doc-name as xs:string) as item()* { |
---|
121 | fn:doc(fn:concat($collection, "/", $doc-name)) |
---|
122 | }; |
---|
123 | |
---|
124 | |
---|
125 | (: |
---|
126 | Function for telling wether the result is already available or not. |
---|
127 | :) |
---|
128 | declare function cmd-model:is-result-available($collection as xs:string, $result-ref as xs:string) as xs:boolean { |
---|
129 | fn:doc-available(fn:concat($collection, $result-ref, $cmd-model:xmlExt)) |
---|
130 | }; |
---|
131 | |
---|
132 | (: |
---|
133 | Function for telling wether the document is available or not. |
---|
134 | :) |
---|
135 | declare function cmd-model:is-doc-available($collection as xs:string, $doc-name as xs:string) as xs:boolean { |
---|
136 | fn:doc-available(fn:concat($collection, "/", $doc-name)) |
---|
137 | }; |
---|
138 | |
---|
139 | (: |
---|
140 | Store the calculated frequencies for reuse. |
---|
141 | If more than one collection is given the result is stored in the common |
---|
142 | collection for reuse. |
---|
143 | :) |
---|
144 | declare function cmd-model:store-result($coll-names as xs:string+, $entries as element()*, $type-name as xs:string, $depth as xs:integer) as xs:string { |
---|
145 | let $clarin-writer := fn:doc("/db/clarin/writer.xml"), |
---|
146 | $dummy := xdb:login($cmd-model:cmdiDatabaseURI, $clarin-writer//write-user/text(), $clarin-writer//write-user-cred/text()) |
---|
147 | return |
---|
148 | |
---|
149 | if (fn:exists($coll-names[2])) then |
---|
150 | (: Det gÀller fler Àn en samling. :) |
---|
151 | xdb:store($cmd-model:commonFreqsPath, cmd-model:make-compound-doc-name($coll-names, $type-name, $depth), cmd-model:make-doc-element-of-type($type-name, $coll-names, $entries, $depth)) |
---|
152 | else |
---|
153 | (: Det gÀller endast en samling. :) |
---|
154 | let $dummy := util:log('debug', fn:concat('Stores ', $type-name, ' in ', $coll-names)) |
---|
155 | return xdb:store($coll-names, cmd-model:make-doc-name($coll-names, $type-name, xs:string($depth), fn:false()), cmd-model:make-doc-element-of-type($type-name, (), $entries, xs:string($depth))) |
---|
156 | }; |
---|
157 | |
---|
158 | (: |
---|
159 | Store the collection listing in give collection. |
---|
160 | :) |
---|
161 | declare function cmd-model:store-collection-data($data as node(), $collection-path as xs:string) as xs:string? { |
---|
162 | let $clarin-writer := fn:doc("/db/clarin/writer.xml"), |
---|
163 | $dummy := xdb:login($cmd-model:cmdiDatabaseURI, $clarin-writer//write-user/text(), $clarin-writer//write-user-cred/text()) |
---|
164 | return |
---|
165 | (: util:catch("org.exist.xquery.XPathException", :) xdb:store($collection-path, $cmd-model:collectionDocName, $data)(: , ()) :) |
---|
166 | }; |
---|
167 | |
---|
168 | (: |
---|
169 | Create document name for type () with or without collection path. |
---|
170 | :) |
---|
171 | declare function cmd-model:make-doc-name($coll-name as xs:string?, $type-name as xs:string, $depth as xs:string, $incl-path as xs:boolean) as xs:string { |
---|
172 | let $doc-name := fn:concat($type-name, $depth, $cmd-model:xmlExt) |
---|
173 | return |
---|
174 | if ($incl-path) then |
---|
175 | fn:concat($coll-name, "/", $doc-name) |
---|
176 | else |
---|
177 | $doc-name |
---|
178 | }; |
---|
179 | |
---|
180 | (: |
---|
181 | Create document name with md5-hash for selected collections (or types) for reuse. |
---|
182 | :) |
---|
183 | declare function cmd-model:make-compound-doc-name($coll-names as xs:string+, $type-name as xs:string, $depth as xs:string) as xs:string { |
---|
184 | let $name-prefix := fn:concat($type-name, $depth) |
---|
185 | return |
---|
186 | fn:concat($name-prefix, "-", util:hash(string-join($coll-names, ""), "MD5"), $cmd-model:xmlExt) |
---|
187 | }; |
---|
188 | |
---|
189 | (: |
---|
190 | Skapa ett element av angiven typ. |
---|
191 | :) |
---|
192 | declare function cmd-model:make-element-of-type($type-name as xs:string, $count as xs:string, $text-count as xs:string, $text-types-count as xs:string, $value as xs:string) as element() { |
---|
193 | element {$type-name} { |
---|
194 | |
---|
195 | attribute count {$freq}, |
---|
196 | attribute text-count {$rank}, |
---|
197 | attribute text-types-count {$text-types}, |
---|
198 | text {$value} |
---|
199 | } |
---|
200 | }; |
---|
201 | |
---|
202 | (: |
---|
203 | Skapa ett dokumentelement av angiven typ. |
---|
204 | :) |
---|
205 | declare function cmd-model:make-doc-element-of-type($type-name as xs:string, $coll-names as xs:string*, $entries as element()*, $depth as xs:string) as element() { |
---|
206 | let $depth-value := attribute depth {$depth}, |
---|
207 | $coll-names-value := if (fn:empty($coll-names)) then () else attribute colls {fn:string-join($coll-names, ",")} |
---|
208 | return |
---|
209 | element {cmd-model:get-doc-type-element-name($type-name)} { |
---|
210 | $depth-value, |
---|
211 | $coll-names-value, |
---|
212 | attribute created {fn:current-dateTime()}, |
---|
213 | $entries |
---|
214 | } |
---|
215 | }; |
---|
216 | |
---|
217 | (: |
---|
218 | Skapa elementnamn för dokumentet av typ. |
---|
219 | :) |
---|
220 | declare function cmd-model:get-doc-type-element-name($type-name as xs:string) as xs:string { |
---|
221 | $cmd-model:docTypeTerms |
---|
222 | }; |
---|
223 | |
---|
224 | |
---|
225 | |
---|
226 | |
---|
227 | (: |
---|
228 | Seraliseringsformat. |
---|
229 | :) |
---|
230 | declare function cmd-model:serialise-as($item as node()?, $format as xs:string) as item()? { |
---|
231 | if ($format eq $cmd-model:responseFormatJSon) then |
---|
232 | let $option := util:declare-option("exist:serialize", "method=text media-type=application/json") |
---|
233 | return |
---|
234 | (: json:xml-to-json($item) :) $item |
---|
235 | else (: $cmd-model:responseFormatXml, $cmd-model:responseFormatText:) |
---|
236 | $item |
---|
237 | }; |
---|
238 | |
---|
239 | |
---|
240 | (:~ |
---|
241 | API function queryModel. |
---|
242 | :) |
---|
243 | declare function cmd-model:query-model($cmd-index-path as xs:string, $collection as xs:string+, $format as xs:string, $max-depth as xs:integer) as item() { |
---|
244 | cmd-model:serialise-as(cmd-model:get-result-doc($collection, $cmd-index-path, $max-depth), $format) |
---|
245 | }; |
---|
246 | |
---|
247 | (:~ |
---|
248 | API function getCollections. |
---|
249 | :) |
---|
250 | declare function cmd-model:get-collections($collections as xs:string+, $format as xs:string, $max-depth as xs:integer) as item() { |
---|
251 | (: fixme! - collections, is-doc-available only takes one collection.:) |
---|
252 | let $dummy := if (cmd-model:is-doc-available($collections, $cmd-model:collectionDocName)) then |
---|
253 | () |
---|
254 | else |
---|
255 | let $children := for $collection-item in $collections |
---|
256 | return |
---|
257 | cmd-model:recurse-collections($collection-item, $max-depth) |
---|
258 | return |
---|
259 | cmd-model:store-collection-data(<Collections count="{count($children)}" root="{$collections}">{$children}</Collections>, $collections) |
---|
260 | return |
---|
261 | cmd-model:serialise-as(cmd-model:get-doc($collections, $cmd-model:collectionDocName), $format) |
---|
262 | }; |
---|
263 | |
---|
264 | (:~ |
---|
265 | API function searchRetrieve. |
---|
266 | :) |
---|
267 | declare function cmd-model:search-retrieve($cql-query as xs:string, $collection as xs:string+, $format as xs:string, $start-item as xs:integer, $end-item as xs:integer) as item()* { |
---|
268 | let $results := util:eval(fn:concat("collection('", xdb:decode($collection), "')", xdb:decode($cql-query), "/ancestor::CMD")), |
---|
269 | $result-count := fn:count($results), |
---|
270 | $result-seq := fn:subsequence($results, $start-item, $end-item), |
---|
271 | $seq-count := fn:count($result-seq), |
---|
272 | $result-fragment := |
---|
273 | <searchRetrieveResponse> |
---|
274 | <numberOfRecords>{$result-count}</numberOfRecords> |
---|
275 | <echoedSearchRetrieveRequest>{$cql-query, $collection, $start-item, $end-item}</echoedSearchRetrieveRequest> |
---|
276 | <diagnostics>{$seq-count}</diagnostics> |
---|
277 | <records> |
---|
278 | {$result-seq} |
---|
279 | </records> |
---|
280 | </searchRetrieveResponse> |
---|
281 | |
---|
282 | return |
---|
283 | cmd-model:serialise-as($result-fragment, $format) |
---|
284 | |
---|
285 | }; |
---|
286 | (: |
---|
287 | {cmdComponent} //{cmdComponent} Actor //Actor |
---|
288 | {cmdPath}. //{cmdPath}/{cmdComponent} Actor.Contact.Phone //Actor/Contact/Phone |
---|
289 | {cmdIndex} {rel} {term} //{cmdIndex}[\. {rel} '{term}'] Actors.Actor.Sex=f //Actors/Actor/Sex[.='f'] |
---|
290 | {cmdIndex} any {term} //{cmdIndex}[contains(. '{term}')] Organisation.Name any University //Organisation/Name[contains(.,'University')] |
---|
291 | and, or, and not ?! Organisation.Name any University and Actor.gender=m ?! |
---|
292 | |
---|
293 | //MDGroup[Actors/Actor/Role[.='sponsor'] and Actors/Actor/Name[contains(.,'a')]] |
---|
294 | //Title[starts-with(.,'a')] |
---|
295 | //Title[starts-with(.,'A')] |
---|
296 | //Title[contains(.,'analysis')] |
---|
297 | http://demo.spraakdata.gu.se/clarin/cmd/model/stats?operation=searchRetrieve&query=//Title[contains(.,'analysis')]&collection= |
---|
298 | |
---|
299 | <record> |
---|
300 | <recordSchema>info:srw/schema/1/dc-v1.1</recordSchema> |
---|
301 | <recordPacking>xml</recordPacking> |
---|
302 | <recordData> |
---|
303 | <srw_dc:dc xmlns:srw_dc="info:srw/schema/1/dc-v1.1"> |
---|
304 | <dc:title>This is a Sample Record</dc:title> |
---|
305 | </srw_dc:dc> |
---|
306 | </recordData> |
---|
307 | <recordPosition>1</recordPosition> |
---|
308 | <extraRecordData> |
---|
309 | <rel:score xmlns:rel="info:srw/extensions/2/rel-1.0"> |
---|
310 | 0.965 |
---|
311 | </rel:rank> |
---|
312 | </extraRecordData> |
---|
313 | </record> |
---|
314 | |
---|
315 | <searchRetrieveResponse> |
---|
316 | <numberOfRecords>integer</numberOfRecords> |
---|
317 | <echoedSearchRetrieveRequest>query itself (together with the context-collection) </echoedSearchRetrieveRequest> |
---|
318 | <diagnostics>if necessary</diagnostics> |
---|
319 | <records> |
---|
320 | .... |
---|
321 | </records> |
---|
322 | </searchRetrieveResponse> |
---|
323 | |
---|
324 | :) |
---|