import module namespace kwic="http://exist-db.org/xquery/kwic" at "xmldb:exist:///db/modules/kwic.xql"; declare function local:filter($node as node(), $mode as xs:string) as xs:string? { if ($node/parent::SPEAKER or $node/parent::STAGEDIR) then () else if ($mode eq 'before') then concat($node, ' ') else concat(' ', $node) }; (: context scope: number of preceding / following words :) let $scope := 5 (: determine context width for KWIC results: 10 characters per context word, minimally 40 :) let $cutoff := max(($scope * 10, xs:int(40))) let $config := let $hits := doc("/db/shakespeare/plays/hamlet.xml")//SPEECH[ft:query(., "nature king sir")] let $KWIC := $hits/kwic:summarize(., $config, util:function(xs:QName("local:filter"), 2)) (: split up collocations per search term :) for $term in distinct-values($KWIC//kwic:hit/lower-case(normalize-space(.))) order by $term return (: prepare entire left / right contexts for tokenization: -lower case -normalize whitespace -reverse $prev :) let $prev := for $a in $KWIC[kwic:hit/lower-case(normalize-space(.))) eq $term]/kwic:prev/lower-case(normalize-space(.)) return string-join(reverse(tokenize($a, '\W+')), ' ') let $next := $KWIC[kwic:hit/lower-case(normalize-space(.))) eq $term]//kwic:next/lower-case(normalize-space(.)) (: per context position, retrieve all distinct words :) (: note: discard numbers :) let $prev := for $context in ((0 - $scope) to -1) let $words := let $tok := for $a in $prev return tokenize($a, '\W+')[matches(., '[a-zA-Z]')][position() = abs($context)] for $b in distinct-values($tok) order by $b return {$b} return {$words} let $next := for $context in (1 to $scope) let $words := let $tok := for $a in $next return tokenize($a, '\W+')[matches(., '[a-zA-Z]')][$context] for $b in distinct-values($tok) order by $b return {$b} return {$words} let $max := max(($next|$prev)/count(w)) (: spread out words-per-context over table rows :) return { , , for $a in $next return } , for $i in (1 to $max) return { , for $a in $prev return , , for $a in $next return } }
{ for $a in $prev return {$a/@pos/string()}term{$a/@pos/string()}
{$i}{$a/w[$i]/text()}{$term}{$a/w[$i]/text()}