Collating two sequences

Batch processing often leads to the need to merge two sequences of items based on a common key. For example you may need to selectively update a stored sequence to keep in line with an external source, inserting new items, deleting missing items and updating items which occur on both list. In these examples we merely generate an annotated merged squence. In use, the action on match, new and delete would depend on the application.
See Wikibook for some early work and a blog item on detecting missing values.

lookup

Dan 2012-02-24

Loop through one list , lookup on the other. It is not possible to detect items in target which are not in source in this algorithm.

XQuery

declare function local:merge($source,$target) {
<merge>{
  for $item in $source
  let $item-t := $target[@id = $item/@id]
  return
    if ($item-t)
    then element match {$item,$item-t}
    else element new {$item}
}</merge>
};

let $list1 :=
<list>
   <item id="a">A</item>
   <item id="b">B</item>
   <item id="d">D</item>
</list>
 
let $list2 :=
<list>
   <item id="a">a</item>
   <item id="b">b</item>
   <item id="c">c</item>
   <item id="e">e</item>
</list>
  
return
  local:merge($list1/*,$list2/*)


Result

<merge>
    <match>
        <item id="a">A</item>
        <item id="a">a</item>
    </match>
    <match>
        <item id="b">B</item>
        <item id="b">b</item>
    </match>
    <new>
        <item id="d">D</item>
    </new>
</merge>

Recursive Merge

Chris 2012-02-24

A recursive merge compares the keys of the first items in each list to decide if there is a match or miss in either sequence. The function then recusively merges the remainder of the sequence. This assumes that the sequences are in key order. Care is required over the cases where either or both lists are exhausted.

XQuery

declare function local:merge($source, $target )  {
    if (empty($source) and empty($target))
    then ()
    else if (empty ($target) or $source[1]/@id lt $target[1]/@id)
    then (element new {$source[1]}, local:merge(subsequence($source, 2), $target))
    else  if (empty($source) or $source[1]/@id gt $target[1]/@id)
    then  (element delete {$target[1]},local:merge($source, subsequence($target,2)))           
    else  (element match {$source[1], $target[1]},  local:merge(subsequence($source,2), subsequence($target,2)))
   };


let $list1 :=
<list>
   <item id="a">A</item>
   <item id="b">B</item>
   <item id="d">D</item>
</list>
 
let $list2 :=
<list>
   <item id="a">a</item>
   <item id="b">b</item>
   <item id="c">c</item>
   <item id="e">e</item>
</list>
  
return
  element merge {local:merge($list1/*,$list2/*)}
 

Result

<merge>
    <match>
        <item id="a">A</item>
        <item id="a">a</item>
    </match>
    <match>
        <item id="b">B</item>
        <item id="b">b</item>
    </match>
    <delete>
        <item id="c">c</item>
    </delete>
    <new>
        <item id="d">D</item>
    </new>
    <delete>
        <item id="e">e</item>
    </delete>
</merge>

Terminal Merge

Chris 2012-02-24

The recursive merge might be faster if both sequences were teminated with high-value keys so empty conditions dont need to be tested. This was a typical device used in batch processing. However the performance is actually worse not better.

XQuery

declare function local:merge($source,$target) {
  let $lastkey := "zzzzzzzzzzzz"
  let $last := element last { attribute id {$lastkey}}
  return local:merge(($source,$last), ($target,$last),$lastkey)
};
declare function local:merge($source, $target,$lastkey)  {
    if ($source[1]/@id = $target[1]/@id)
    then 
        if ($source[1]/@id = $lastkey)
        then ()
        else (element match {$source[1], $target[1]},  local:merge(subsequence($source,2), subsequence($target,2)))
    else if ($source[1]/@id lt $target[1]/@id)
    then (element new {$source[1]}, local:merge(subsequence($source, 2), $target))
    else (element delete {$target[1]},local:merge($source, subsequence($target,2)))           
     };


let $list1 :=
<list>
   <item id="a">A</item>
   <item id="b">B</item>
   <item id="d">D</item>
</list>
 
let $list2 :=
<list>
   <item id="a">a</item>
   <item id="b">b</item>
   <item id="c">c</item>
   <item id="e">e</item>
</list>
  
return
  element merge {local:merge($list1/*,$list2/*)}
 

Result

<merge>
    <match>
        <item id="a">A</item>
        <item id="a">a</item>
    </match>
    <match>
        <item id="b">B</item>
        <item id="b">b</item>
    </match>
    <delete>
        <item id="c">c</item>
    </delete>
    <new>
        <item id="d">D</item>
    </new>
    <delete>
        <item id="e">e</item>
    </delete>
</merge>

Key Merge

Chris 2012-02-24

Key-based merge iterates through the full set of keys in both source and target sequences using distinct-values, indexing into both sequences to see if the key is present on both or only one. The sequences dont have to be in key order. If the output is required in key order, the keys can be sorted first.

XQuery

declare function local:key-merge($source,$target) {
<merge>{
  for $key in distinct-values(($source/@id,$target/@id) ) 
  let $s := $source[@id = $key]
  let $t := $target[@id = $key]
  return
    if ($s and $t)         (: source and target :)
    then element match {$s,$t}
    else if ($s)            (: source only :)
    then element new {$s}
    else                    (: target only :)
         element delete{$t}
  
}</merge>
};

let $list1 :=
<list>
   <item id="a">A</item>
   <item id="b">B</item>
   <item id="d">D</item>
</list>
 
let $list2 :=
<list>
   <item id="a">a</item>
   <item id="b">b</item>
   <item id="c">c</item>
   <item id="e">e</item>
</list>
  
return
  local:key-merge($list1/*,$list2/*)

Result

<merge>
    <match>
        <item id="a">A</item>
        <item id="a">a</item>
    </match>
    <match>
        <item id="b">B</item>
        <item id="b">b</item>
    </match>
    <new>
        <item id="d">D</item>
    </new>
    <delete>
        <item id="c">c</item>
    </delete>
    <delete>
        <item id="e">e</item>
    </delete>
</merge>

Sorted Key Merge

Chris 2012-02-24

Key merge with sorted keys

XQuery

declare function local:key-merge($source,$target) {
<merge>{
  for $key in 
       for $key in distinct-values(($source/@id,$target/@id) ) order by $key return $key
  let $s := $source[@id = $key]
  let $t := $target[@id = $key]
  return
    if ($s and $t)         (: source and target :)
    then element match {$s,$t}
    else if ($s)            (: source only :)
    then element new {$s}
    else                    (: target only :)
         element delete{$t}
  
}</merge>
};

let $list1 :=
<list>
   <item id="a">A</item>
   <item id="b">B</item>
   <item id="d">D</item>
</list>
 
let $list2 :=
<list>
   <item id="a">a</item>
   <item id="b">b</item>
   <item id="c">c</item>
   <item id="e">e</item>
</list>
  
return
  local:key-merge($list1/*,$list2/*)

Result

<merge>
    <match>
        <item id="a">A</item>
        <item id="a">a</item>
    </match>
    <match>
        <item id="b">B</item>
        <item id="b">b</item>
    </match>
    <delete>
        <item id="c">c</item>
    </delete>
    <new>
        <item id="d">D</item>
    </new>
    <delete>
        <item id="e">e</item>
    </delete>
</merge>