(:~ KWIC module: formats query results to display keywords in context (KWIC). A configurable amount of text is displayed to the left and right of a matching keyword (or phrase). The module works with all indexes that support match highlighting (matches are tagged with an <exist:match> element). This includes the old full text index, the new Lucene-based full text index, as well as the NGram index. The kwic:summarize() function represents the main entry point into the module. To have more control over the text extraction context, you can also call kwic:get-summary() instead. For example, the following snippet will only print the first match within a given set of context nodes ($ancestor):
let $matches := kwic:get-matches($hit)
for $ancestor in $matches/ancestor::para | $matches/ancestor::title | $matches/ancestor::td
return
kwic:get-summary($ancestor, ($ancestor//exist:match)[1], $config)
:)
module namespace kwic="http://exist-db.org/xquery/kwic";
declare variable $kwic:CHARS_SUMMARY := 120;
declare variable $kwic:CHARS_KWIC := 40;
(:~
Retrieve the following and preceding text chunks for a given match.
@param $match the text node containing the match
@param $mode the selection mode: either "previous" or "following"
:)
declare function kwic:get-context($root as element(), $match as element(exist:match), $mode as xs:string) as node()* {
let $sibs :=
if ($mode eq 'previous') then
$match/preceding::text()
else
$match/text()/following::text()
for $sib in $sibs[ancestor::* intersect $root]
return
if ($sib/parent::exist:match) then
{$sib}
else
$sib
};
(:~
Like fn:substring, but takes a node argument. If the node is an element,
a new element is created with the same node-name as the old one and the
shortened text content.
:)
declare function kwic:substring($node as item(), $start as xs:int, $count as xs:int) as item()? {
let $str := substring($node, $start, $count)
return
if ($node instance of element()) then
element { node-name($node) } { $str }
else
$str
};
declare function kwic:display-text($text as text()?) as node()? {
if ($text/parent::exist:match) then
{$text}
else
$text
};
declare function kwic:callback($callback as function?, $node as node(), $mode as xs:string) as xs:string? {
if (exists($callback)) then
util:call($callback, $node, $mode)
else
$node
};
(:~
Generate the left-hand context of the match. Returns a normalized string,
whose total string length is less than or equal to $width characters.
Note: this function calls itself recursively until $node is empty or
the returned sequence has the desired total string length.
:)
declare function kwic:truncate-previous($root as node()?, $node as node()?, $truncated as item()*,
$width as xs:int, $callback as function?) {
let $nextProbe := $node/preceding::text()[1]
let $next := if ($root[not(. intersect $nextProbe/ancestor::*)]) then () else $nextProbe
let $probe :=
if (exists($callback)) then
concat(for $a in $next return kwic:callback($callback, $nextProbe, "after"), $truncated)
else concat($nextProbe, ' ', $truncated)
return
if (string-length($probe) gt $width) then
let $norm := concat(normalize-space($probe), ' ')
return
if (string-length($norm) le $width and $next) then
kwic:truncate-previous($root, $next, $norm, $width, $callback)
else if ($next) then
concat('...', substring($norm, string-length($norm) - $width + 1))
else
$norm
else if ($next) then
kwic:truncate-previous($root, $next, $probe, $width, $callback)
else for $str in normalize-space($probe)[.] return concat($str, ' ')
};
(:~
Generate the left-hand context of the match. Returns a string which preserves the original whitespace,
whose total string length is less than or equal to $width characters.
Note: this function calls itself recursively until $node is empty or
the returned sequence has the desired total string length.
:)
declare function kwic:truncate-previous-ps($root as node()?, $node as node()?, $truncated as item()*,
$width as xs:int, $callback as function?) {
let $nextProbe := $node/preceding::text()[1]
let $next := if ($root[not(. intersect $nextProbe/ancestor::*)]) then () else $nextProbe
let $probe :=
if ($callback) then
concat(for $a in $next return kwic:callback($callback, $nextProbe, "after"), $truncated)
else concat($next, $truncated)
return
if (string-length($probe) gt $width) then
if ($next) then
concat('...', substring($probe, string-length($probe) - $width + 1))
else
$probe
else if ($next) then
kwic:truncate-previous-ps($root, $next, $probe, $width, $callback)
else $probe
};
(:~
Generate the right-hand context of the match. Returns a normalized string,
whose total string length is less than or equal to $width characters.
Note: this function calls itself recursively until $node is empty or
the returned sequence has the desired total string length.
:)
declare function kwic:truncate-following($root as node()?, $node as node()?, $truncated as item()*,
$width as xs:int, $callback as function?) {
let $nextProbe := $node/following::text()[1]
let $next := if ($root[not(. intersect $nextProbe/ancestor::*)]) then () else $nextProbe
let $probe :=
if (exists($callback)) then
concat($truncated, for $a in $next return kwic:callback($callback, $nextProbe, "after"))
else concat($truncated, ' ', $nextProbe)
return
if (string-length($probe) gt $width) then
let $norm := concat(' ', normalize-space($probe))
return
if (string-length($norm) le $width and $next) then
kwic:truncate-following($root, $next, $norm, $width, $callback)
else if ($next) then
concat(substring($norm, 1, $width), '...')
else
$norm
else if ($next) then
kwic:truncate-following($root, $next, $probe, $width, $callback)
else for $str in normalize-space($probe)[.] return concat(' ', $str)
};
(:~
Generate the right-hand context of the match. Returns a string which preserves the original whitespace,
whose total string length is less than or equal to $width characters.
Note: this function calls itself recursively until $node is empty or
the returned sequence has the desired total string length.
:)
declare function kwic:truncate-following-ps($root as node()?, $node as node()?, $truncated as item()*,
$width as xs:int, $callback as function?) {
let $nextProbe := $node/following::text()[1]
let $next := if ($root[not(. intersect $nextProbe/ancestor::*)]) then () else $nextProbe
let $probe :=
if ($callback) then
concat($truncated, for $a in $next return kwic:callback($callback, $nextProbe, "after"))
else concat($truncated, $nextProbe)
return
if (string-length($probe) gt $width) then
if ($next) then
concat(substring($probe, 1, $width), '...')
else
$probe
else if ($next) then
kwic:truncate-following-ps($root, $next, $probe, $width, $callback)
else $probe
};
(:~
Computes the total string length of the nodes in the argument sequence
:)
declare function kwic:string-length($nodes as item()*) as xs:integer {
if (exists($nodes)) then
sum(for $n in $nodes return string-length($n))
else
0
};
(:~
Print a summary of the match in $node. Output a predefined amount of text to
the left and the right of the match.
This 3-argument helper function (lacking the $callback argument) just passes
an empty $callback argument to the main 4-argument function kwic:get-summary().
@param $root (optional) root element which should be used as context for the match. It defines the
boundaries for the text extraction. Text will be taken from this context.
@param $node the exist:match element to process.
@param $config configuration element which determines the behaviour of the function
:)
declare function kwic:get-summary($root as node()?, $node as element(exist:match),
$config as element(config)) as element() {
kwic:get-summary($root, $node, $config, ())
};
(:~
Print a summary of the match in $node. Output a predefined amount of text to
the left and the right of the match.
@param $root (optional) root element which should be used as context for the match. It defines the
boundaries for the text extraction. Text will be taken from this context.
@param $node the exist:match element to process.
@param $config configuration element which determines the behaviour of the function
@param $callback (optional) reference to a callback function which will be called
once for every text node before it is appended to the displayed text. The function
should accept 2 parameters: 1) a single text node, 2) a string indicating the
current direction in which text is appended, i.e. "before" or "after". The function
may return the empty sequence if the current node should be ignore (e.g. if it belongs
to a "footnote" which should not be displayed). Otherwise it should return a single
string.
:)
declare function kwic:get-summary($root as node()?, $node as element(exist:match),
$config as element(config), $callback as function?) as element() {
let $width := xs:int($config/@width)
let $format := $config/@format
let $ps := $config/@preserve-space = ('yes', 'true')
let $prevTrunc := if ($ps) then kwic:truncate-previous-ps($root, $node, (), $width, $callback)
else kwic:truncate-previous($root, $node, (), $width, $callback)
let $followingTrunc := if ($ps) then kwic:truncate-following-ps($root, $node, (), $width, $callback)
else kwic:truncate-following($root, $node, (), $width, $callback)
return
if ($format eq 'p') then
{$prevTrunc} { if ($config/@link) then { $node/text() } else { $node/text() } } {$followingTrunc}
else if ($format eq 'table') then