Ignore:
Timestamp:
01/09/11 20:28:15 (13 years ago)
Author:
vronk
Message:

mainly added new API-function: scanIndex
(group.xsl used for aggregating)

File:
1 edited

Legend:

Unmodified
Added
Removed
  • MDRepository/trunk/xquery/cmd-model.xqm

    r959 r1045  
    1212declare variable $cmd-model:cmdiMirrorPath as xs:string := "/db/cmdi-mirror";
    1313declare variable $cmd-model:cachePath as xs:string := "/db/cache";
    14 
     14declare variable $cmd-model:groupXsl := doc('/db/clarin/group.xsl');
    1515declare variable $cmd-model:getCollections as xs:string := "getCollections";
    1616declare variable $cmd-model:queryModel as xs:string := "queryModel";
     17declare variable $cmd-model:scanIndex as xs:string := "scanIndex";
    1718declare variable $cmd-model:searchRetrieve as xs:string := "searchRetrieve";
    1819
     
    2829declare variable $cmd-model:responseFormatText as xs:string := "text";
    2930
     31declare variable $cmd-model:scanSortText as xs:string := "text";
     32declare variable $cmd-model:scanSortSize as xs:string := "size";
     33
    3034declare variable $cmd-model:collectionDocName as xs:string := "collection.xml";
    3135
     
    3438declare variable $cmd-model:xmlExt as xs:string := ".xml";
    3539
     40declare variable $cmd-model:maxDepth as xs:integer := 8;
    3641declare variable $cmd-model:valuesLimit as xs:integer := 100;
    3742
     43
     44(:~
     45  API function getCollections.
     46:)
     47declare function cmd-model:get-collections($collections as xs:string+, $format as xs:string, $max-depth as xs:integer) as item() {
     48  let $name := cmd-model:gen-cache-id("collection", $collections, xs:string($max-depth)),
     49    $doc :=
     50    if (cmd-model:is-in-cache($name)) then
     51       cmd-model:get-from-cache($name)
     52    else
     53      let $data := cmd-model:colls($collections, $max-depth)
     54        return cmd-model:store-in-cache($name, $data)
     55  return
     56    cmd-model:serialise-as($doc, $format)
     57};
    3858
    3959(:~
     
    5474
    5575(:~
    56   API function getCollections.
    57 :)
    58 declare function cmd-model:get-collections($collections as xs:string+, $format as xs:string, $max-depth as xs:integer) as item() {
    59   let $name := cmd-model:gen-cache-id("collection", $collections, xs:string($max-depth)),
    60     $doc :=
    61     if (cmd-model:is-in-cache($name)) then
    62        cmd-model:get-from-cache($name)
    63     else
    64       let $data := cmd-model:colls($collections, $max-depth)
    65         return cmd-model:store-in-cache($name, $data)
     76  API function scanIndex.
     77two phases:
     78        1.one create full index for given path/element (and cache)
     79        2. select wished subsequence (on second call, only the second step is performed)
     80:)
     81declare function cmd-model:scan-index($q as xs:string, $collection as xs:string+, $format as xs:string, $start-item as xs:integer, $max-items as xs:integer, $p-sort as xs:string?) as item()? {
     82
     83  let $qa := tokenize($q,'='),
     84         $cmd-index-path := $qa[1],
     85         $filter := ($qa[2],'')[1],
     86         $sort := if ($p-sort eq $cmd-model:scanSortText or $p-sort eq $cmd-model:scanSortSize) then $p-sort else $cmd-model:scanSortText,
     87          $name := cmd-model:gen-cache-id("index", ($collection, $cmd-index-path),"1"),
     88    (: skip cache $doc := cmd-model:values($cmd-index-path, $collection) :)
     89    $doc := if (cmd-model:is-in-cache($name)) then
     90      cmd-model:get-from-cache($name)
     91    else 
     92      let  $data := cmd-model:values($cmd-index-path, $collection)
     93        return cmd-model:store-in-cache($name, $data)
     94
     95        (: extract the required subsequence (according to given sort) :)
     96        let $res-term := transform:transform($doc,$cmd-model:groupXsl,
     97                        <parameters><param name="mode" value="subsequence"/>
     98                                                <param name="sort" value="{$sort}"/>
     99                                                <param name="filter" value="{$filter}"/>
     100                                                <param name="start-item" value="{$start-item}"/>
     101                                                <param name="max-items" value="{$max-items}"/>
     102                        </parameters>),
     103                $count-items := count($res-term/v),
     104                $colls := if (fn:empty($collection)) then '' else fn:string-join($collection, ","),
     105                $created := fn:current-dateTime(),
     106                $scan-clause := concat($cmd-index-path, '=', $filter),
     107                $res := <Terms colls="{$colls}" created="{$created}" count_items="{$count-items}"
     108                                        start-item="{$start-item}" max-items="{$max-items}" sort="{$sort}" scanClause="{$scan-clause}"  >{$res-term}</Terms>
     109
     110(:      let     $result-count := $doc/Term/@count,
     111    $result-seq := fn:subsequence($doc/Term/v, $start-item, $end-item),
     112        $result-frag := ($doc/Term, $result-seq),
     113    $seq-count := fn:count($result-seq) :)
     114
    66115  return
    67     cmd-model:serialise-as($doc, $format)
     116    cmd-model:serialise-as($res, $format)       
    68117};
    69118
     
    82131      for $coll in $collections return util:eval(fn:concat("$collection/ft:query(descendant::IsPartOf, <term>", xdb:decode($coll) ,"</term>)/ancestor-or-self::CMD", $sanitized-query))
    83132
    84     let $result-count := fn:count($results),
     133        let     $result-count := fn:count($results),
    85134    $result-seq := fn:subsequence($results, $start-item, $end-item),
    86135    $seq-count := fn:count($result-seq),
    87     $end-time := util:system-dateTime(),
    88     $result-fragment :=
     136        $end-time := util:system-dateTime()
     137
     138        let $summary-fragment :=
     139                if (contains($format,'withSummary')) then
     140                        let $used-profiles := for $profile in distinct-values($results//Components/concat(child::element()/name(),'##',../Header/MdProfile))
     141                                                        let $profile-id := substring-after($profile,'##'), $profile-name := substring-before($profile,'##')
     142                                                        return <profile id="{$profile-id}" name="{$profile-name}" count="{count($results//Components[concat(child::element()/name(),'##',../Header/MdProfile) eq $profile])}" />,
     143                                $end-time2 := util:system-dateTime(),
     144                                $result-summary := cmd-model:elem-r($result-seq//Components, "Components", $cmd-model:maxDepth, $cmd-model:maxDepth),
     145                        $end-time3 := util:system-dateTime(),
     146                                $duration :=  concat(($end-time - $start-time),", ", ($end-time2 - $start-time),", ", ($end-time3 - $start-time))
     147                        return (<duration>{$duration}</duration>, <usedProfiles>{$used-profiles}</usedProfiles>,<resultSummary>{$result-summary}</resultSummary>)
     148                else <duration>{$end-time - $start-time}</duration>
     149               
     150    let $result-fragment :=
    89151    <searchRetrieveResponse>
    90152      <numberOfRecords>{$result-count}</numberOfRecords>
     
    92154      <extraResponseData>
    93155        <returnedRecords>{$seq-count}</returnedRecords>
    94         <duration>{$end-time - $start-time}</duration>
     156                {$summary-fragment}
    95157      </extraResponseData>
    96158      <records>
     
    106168(:
    107169  **********************
    108   queryModel - subfunctions
     170  queryModel, scanIndex - subfunctions
    109171:)
    110172
     
    154216            (for $elname in $subs[. != '']
    155217            return
    156               cmd-model:elem-r(util:eval(concat("$path-nodes/", $elname)), concat($path, '/', $elname), $max-depth, $depth - 1),
    157               if ($max-depth eq 1 and $text-count gt 0) then cmd-model:values($path-nodes) else ())
     218              cmd-model:elem-r(util:eval(concat("$path-nodes/", $elname)), concat($path, '/', $elname), $max-depth, $depth - 1)
     219                        (: values moved to own function: scanIndex
     220                      if ($max-depth eq 1 and $text-count gt 0) then cmd-model:values($path-nodes) else ()) :)
     221                                                )
    158222          else 'maxdepth'
    159223        }</Term>
    160 };
    161 
    162 declare function cmd-model:values($nodes as node()*) as node()* {
    163 let $keys := distinct-values($nodes/text())
    164 let $values := for $key at $pos in $keys
    165   let $kcount := count($nodes[. eq $key])
    166     order by lower-case($key) ascending
    167     return <v key="{$key}" cnt="{$kcount}" />
    168 return
    169   if ($cmd-model:valuesLimit eq 0) then $values
    170   else
    171   subsequence($values, 1, $cmd-model:valuesLimit)
    172224};
    173225
     
    178230        return util:node-xpath($anc)
    179231        }</Term>
     232};
     233
     234declare function cmd-model:collect-nodes($collections as xs:string+, $path as xs:string) as element()* {
     235  let $collection := collection($cmd-model:cmdiMirrorPath),
     236    $path-nodes :=
     237    if ($collections[1] eq $cmd-model:collectionRoot) then
     238      util:eval(fn:concat("$collection/descendant-or-self::", $path))
     239    else
     240      for $coll in $collections
     241      return
     242        util:eval(fn:concat("$collection/ft:query(descendant::IsPartOf, <query><term>", xdb:decode($coll), "</term></query>)/ancestor-or-self::CMD/descendant-or-self::", $path))
     243   
     244        return $path-nodes
     245};
     246
     247declare function cmd-model:values($path as xs:string,$collections as xs:string+) as element() {
     248
     249        let $nodes := cmd-model:collect-nodes($collections, $path),
     250(:              $term := <Term path="{fn:concat("//", $path)}" name="{(text:groups($path, "/([^/]+)$")[last()],$path)[1] }" >{$nodes}</Term>
     251                @name is added in xslt:)
     252                $term := <Term path="{fn:concat("//", $path)}"  >{$nodes}</Term>
     253
     254        (: use XSLT-2.0 for-each-group functionality to aggregate the values of a node - much, much faster, than XQuery :)
     255        return transform:transform($term,$cmd-model:groupXsl, ())
     256
    180257};
    181258
Note: See TracChangeset for help on using the changeset viewer.