libfossil: Artifact [12d8c37e18]

Artifact 12d8c37e18b6e54dc71b1d5fa4f12d52dad9e302:

File src/diff2.c — part of check-in [150bb5b77c] at 2021-10-06 21:23:40 on branch port-annotate — Initial port of annotate/praise op is working but final results differ considerably from fossil's, so leaving it in a branch for now. The selected range of versions match, but the end results do not. (user: stephan size: 55926)
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 
/* vim: set ts=2 et sw=2 tw=80: */
/*
  Copyright 2013-2021 The Libfossil Authors, see LICENSES/BSD-2-Clause.txt

  SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  SPDX-FileCopyrightText: 2021 The Libfossil Authors
  SPDX-ArtifactOfProjectName: Libfossil
  SPDX-FileType: Code

  Heavily indebted to the Fossil SCM project (https://fossil-scm.org).
*/
/**
   This file houses the "2nd generation" diff-generation APIs. This
   code is a straight port of those algorithms from the Fossil SCM
   project, initially implemented by D. Richard Hipp, ported and
   the license re-assigned to this project with this consent.
*/
#include "fossil-scm/fossil.h"
#include <assert.h>
#include <memory.h>
#include <stdlib.h>
#include <string.h> /* for memmove()/strlen() */

#include <stdio.h>
#define MARKER(pfexp)                                               \
  do{ printf("MARKER: %s:%d:%s():\t",__FILE__,__LINE__,__func__);   \
    printf pfexp;                                                   \
  } while(0)


const fsl_diff_opt fsl_diff_opt_empty = fsl_diff_opt_empty_m;
const fsl_diff_builder fsl_diff_builder_empty = fsl_diff_builder_empty_m;
const fsl_dline fsl_dline_empty = fsl_dline_empty_m;
const fsl_dline_change fsl_dline_change_empty = fsl_dline_change_empty_m;
const fsl_diff_cx fsl_diff_cx_empty = fsl_diff_cx_empty_m;

/* Porting crutch for to-be-static functions */

/*
** Maximum length of a line in a text file, in bytes.  (2**15 = 32768 bytes)
*/
#define LENGTH_MASK_SZ  15
#define LENGTH_MASK     ((1<<LENGTH_MASK_SZ)-1)

void fsl__diff_cx_clean(fsl_diff_cx * const cx){
  fsl_free(cx->aFrom);
  fsl_free(cx->aTo);
  fsl_free(cx->aEdit);
  cx->aFrom = cx->aTo = NULL;
  cx->aEdit = NULL;
  *cx = fsl_diff_cx_empty;
}

/**
   Counts the number of lines in the first n bytes of the given 
   string. If n<0 then fsl_strlen() is used to count it. 

   It includes the last line in the count even if it lacks the \n
   terminator. If an empty string is passed in, the number of lines
   is zero.

   For the purposes of this function, a string is considered empty if
   it contains no characters OR contains only NUL characters.

   If the input appears to be plain text it returns true and, if nOut
   is not NULL, writes the number of lines there. If the input appears
   to be binary, returns false and does not modify nOut.
*/
FSL_EXPORT bool fsl_count_lines(const char *z, fsl_int_t n, uint32_t * nOut );

bool fsl_count_lines(const char *z, fsl_int_t n, uint32_t * nOut ){
  uint32_t nLine;
  const char *zNL, *z2;
  if(n<0) n = (fsl_int_t)fsl_strlen(z);
  for(nLine=0, z2=z; (zNL = strchr(z2,'\n'))!=0; z2=zNL+1, nLine++){}
  if( z2[0]!='\0' ){
    nLine++;
    do{ z2++; }while( z2[0]!='\0' );
  }
  if( n!=(fsl_int_t)(z2-z) ) return false;
  if( nOut ) *nOut = nLine;
  return true;
}

int fsl_break_into_dlines(const char *z, fsl_int_t n,
                          uint32_t *pnLine,
                          fsl_dline **pOut, uint64_t diffFlags){
  uint32_t nLine, i, k, nn, s, x;
  uint64_t h, h2;
  fsl_dline *a = 0;
  const char *zNL;

  if( !fsl_count_lines(z, n, &nLine) ){
    *pOut = 0;
    return FSL_RC_DIFF_BINARY;
  }
  assert( nLine>0 || z[0]=='\0' );
  if(nLine>0){
    a = fsl_malloc( sizeof(a[0])*nLine );
    if(!a) return FSL_RC_OOM;
    memset(a, 0, sizeof(a[0])*nLine);
  }else{
    *pnLine = 0;
    *pOut = a;
    return 0;
  }
  assert( a );
  i = 0;
  do{
    zNL = strchr(z,'\n');
    if( zNL==0 ) zNL = z+n;
    nn = (uint32_t)(zNL - z);
    if( nn>LENGTH_MASK ){
      fsl_free(a);
      *pOut = 0;
      *pnLine = 0;
      return FSL_RC_DIFF_BINARY;
    }
    a[i].z = z;
    k = nn;
    if( diffFlags & FSL_DIFF2_STRIP_EOLCR ){
      if( k>0 && z[k-1]=='\r' ){ k--; }
    }
    a[i].n = k;
    s = 0;
    if( diffFlags & FSL_DIFF2_IGNORE_EOLWS ){
      while( k>0 && fsl_isspace(z[k-1]) ){ k--; }
    }
    if( (diffFlags & FSL_DIFF2_IGNORE_ALLWS)
        ==FSL_DIFF2_IGNORE_ALLWS ){
      uint32_t numws = 0;
      while( s<k && fsl_isspace(z[s]) ){ ++s; }
      for(h=0, x=s; x<k; ++x){
        char c = z[x];
        if( fsl_isspace(c) ){
          ++numws;
        }else{
          h = (h^c)*9000000000000000041LL;
        }
      }
      k -= numws;
    }else{
      uint32_t k2 = k & ~0x7;
      uint64_t m;
      for(h=0, x=s; x<k2; x += 8){
        memcpy(&m, z+x, 8);
        h = (h^m)*9000000000000000041LL;
      }
      m = 0;
      memcpy(&m, z+x, k-k2);
      h ^= m;
    }
    a[i].indent = s;
    a[i].h = h = ((h%281474976710597LL)<<LENGTH_MASK_SZ) | (k-s);
    h2 = h % nLine;
    a[i].iNext = a[h2].iHash;
    a[h2].iHash = i+1;
    z += nn+1; n -= nn+1;
    i++;
  }while( zNL[0]!='\0' && zNL[1]!='\0' );
  assert( i==nLine );

  *pnLine = nLine;
  *pOut = a;
  return 0;
}

int fsl_dline_cmp(const fsl_dline * const pA,
                         const fsl_dline * const pB){
  if( pA->h!=pB->h ) return 1;
  return memcmp(pA->z,pB->z, pA->h&LENGTH_MASK);
}

int fsl_dline_cmp_ignore_ws(const fsl_dline * const pA,
                               const fsl_dline * const pB){
  unsigned short a = pA->indent, b = pB->indent;
  if( pA->h==pB->h ){
    while( a<pA->n || b<pB->n ){
      if( a<pA->n && b<pB->n && pA->z[a++] != pB->z[b++] ) return 1;
      while( a<pA->n && fsl_isspace(pA->z[a])) ++a;
      while( b<pB->n && fsl_isspace(pB->z[b])) ++b;
    }
    return pA->n-a != pB->n-b;
  }
  return 1;
}

/*
** The two text segments zLeft and zRight are known to be different on
** both ends, but they might have  a common segment in the middle.  If
** they do not have a common segment, return 0.  If they do have a large
** common segment, return non-0 and before doing so set:
**
**   aLCS[0] = start of the common segment in zLeft
**   aLCS[1] = end of the common segment in zLeft
**   aLCS[2] = start of the common segment in zLeft
**   aLCS[3] = end of the common segment in zLeft
**
** This computation is for display purposes only and does not have to be
** optimal or exact.
*/
static int textLCS2(
  const char *zLeft,  uint32_t nA, /* String on the left */
  const char *zRight, uint32_t nB, /* String on the right */
  uint32_t *aLCS                   /* Identify bounds of LCS here */
){
  const unsigned char *zA = (const unsigned char*)zLeft;    /* left string */
  const unsigned char *zB = (const unsigned char*)zRight;   /* right string */
  uint32_t i, j, k;               /* Loop counters */
  uint32_t lenBest = 0;           /* Match length to beat */

  for(i=0; i<nA-lenBest; i++){
    unsigned char cA = zA[i];
    if( (cA&0xc0)==0x80 ) continue;
    for(j=0; j<nB-lenBest; j++ ){
      if( zB[j]==cA ){
        for(k=1; j+k<nB && i+k<nA && zB[j+k]==zA[i+k]; k++){}
        while( (zB[j+k]&0xc0)==0x80 ){ k--; }
        if( k>lenBest ){
          lenBest = k;
          aLCS[0] = i;
          aLCS[1] = i+k;
          aLCS[2] = j;
          aLCS[3] = j+k;
        }
      }
    }
  }
  return lenBest>0;
}

/*
** Find the smallest spans that are different between two text strings
** that are known to be different on both ends. Returns the number
** of entries in p->a which get populated.
*/
static unsigned short textLineChanges(
  const char *zLeft,  uint32_t nA, /* String on the left */
  const char *zRight, uint32_t nB, /* String on the right */
  fsl_dline_change * const p             /* Write results here */
){
  p->n = 1;
  p->a[0].iStart1 = 0;
  p->a[0].iLen1 = nA;
  p->a[0].iStart2 = 0;
  p->a[0].iLen2 = nB;
  p->a[0].isMin = 0;
  while( p->n<fsl_dline_change_max_spans-1 ){
    int mxi = -1;
    int mxLen = -1;
    int x, i;
    uint32_t aLCS[4];
    struct fsl_dline_change_span *a, *b;
    for(i=0; i<p->n; i++){
      if( p->a[i].isMin ) continue;
      x = p->a[i].iLen1;
      if( p->a[i].iLen2<x ) x = p->a[i].iLen2;
      if( x>mxLen ){
        mxLen = x;
        mxi = i;
      }
    }
    if( mxLen<6 ) break;
    x = textLCS2(zLeft + p->a[mxi].iStart1, p->a[mxi].iLen1,
                 zRight + p->a[mxi].iStart2, p->a[mxi].iLen2, aLCS);
    if( x==0 ){
      p->a[mxi].isMin = 1;
      continue;
    }
    a = p->a+mxi;
    b = a+1;
    if( mxi<p->n-1 ){
      memmove(b+1, b, sizeof(*b)*(p->n-mxi-1));
    }
    p->n++;
    b->iStart1 = a->iStart1 + aLCS[1];
    b->iLen1 = a->iLen1 - aLCS[1];
    a->iLen1 = aLCS[0];
    b->iStart2 = a->iStart2 + aLCS[3];
    b->iLen2 = a->iLen2 - aLCS[3];
    a->iLen2 = aLCS[2];
    b->isMin = 0;
  }
  return p->n;
}

/*
** Return true if the string starts with n spaces
*/
static int allSpaces(const char *z, int n){
  int i;
  for(i=0; i<n && fsl_isspace(z[i]); ++i){}
  return i==n;
}

/*
** Try to improve the human-readability of the fsl_dline_change p.
**
** (1)  If the first change span shows a change of indentation, try to
**      move that indentation change to the left margin.
**
** (2)  Try to shift changes so that they begin or end with a space.
*/
static void improveReadability(
  const char *zA,  /* Left line of the change */
  const char *zB,  /* Right line of the change */
  fsl_dline_change * const p /* The fsl_dline_change to be adjusted */
){
  int j, n, len;
  if( p->n<1 ) return;

  /* (1) Attempt to move indentation changes to the left margin */
  if( p->a[0].iLen1==0
   && (len = p->a[0].iLen2)>0
   && (j = p->a[0].iStart2)>0
   && zB[0]==zB[j]
   && allSpaces(zB, j)
  ){
    for(n=1; n<len && n<j && zB[j]==zB[j+n]; n++){}
    if( n<len ){
      memmove(&p->a[1], &p->a[0], sizeof(p->a[0])*p->n);
      p->n++;
      p->a[0] = p->a[1];
      p->a[1].iStart2 += n;
      p->a[1].iLen2 -= n;
      p->a[0].iLen2 = n;
    }
    p->a[0].iStart1 = 0;
    p->a[0].iStart2 = 0;
  }else
  if( p->a[0].iLen2==0
   && (len = p->a[0].iLen1)>0
   && (j = p->a[0].iStart1)>0
   && zA[0]==zA[j]
   && allSpaces(zA, j)
  ){
    for(n=1; n<len && n<j && zA[j]==zA[j+n]; n++){}
    if( n<len ){
      memmove(&p->a[1], &p->a[0], sizeof(p->a[0])*p->n);
      p->n++;
      p->a[0] = p->a[1];
      p->a[1].iStart1 += n;
      p->a[1].iLen1 -= n;
      p->a[0].iLen1 = n;
    }
    p->a[0].iStart1 = 0;
    p->a[0].iStart2 = 0;
  }

  /* (2) Try to shift changes so that they begin or end with a
  ** space.  (TBD) */
}

void fsl_dline_change_spans(const fsl_dline *pLeft, const fsl_dline *pRight,
                            fsl_dline_change * const p){
  /* fossil(1) counterpart ==> diff.c oneLineChange() */
  int nLeft;           /* Length of left line in bytes */
  int nRight;          /* Length of right line in bytes */
  int nShort;          /* Shortest of left and right */
  int nPrefix;         /* Length of common prefix */
  int nSuffix;         /* Length of common suffix */
  int nCommon;         /* Total byte length of suffix and prefix */
  const char *zLeft;   /* Text of the left line */
  const char *zRight;  /* Text of the right line */
  int nLeftDiff;       /* nLeft - nPrefix - nSuffix */
  int nRightDiff;      /* nRight - nPrefix - nSuffix */

  nLeft = pLeft->n;
  zLeft = pLeft->z;
  nRight = pRight->n;
  zRight = pRight->z;
  nShort = nLeft<nRight ? nLeft : nRight;

  nPrefix = 0;
  while( nPrefix<nShort && zLeft[nPrefix]==zRight[nPrefix] ){
    nPrefix++;
  }
  if( nPrefix<nShort ){
    while( nPrefix>0 && (zLeft[nPrefix]&0xc0)==0x80 ) nPrefix--;
  }
  nSuffix = 0;
  if( nPrefix<nShort ){
    while( nSuffix<nShort
           && zLeft[nLeft-nSuffix-1]==zRight[nRight-nSuffix-1] ){
      nSuffix++;
    }
    if( nSuffix<nShort ){
      while( nSuffix>0 && (zLeft[nLeft-nSuffix]&0xc0)==0x80 ) nSuffix--;
    }
    if( nSuffix==nLeft || nSuffix==nRight ) nPrefix = 0;
  }
  nCommon = nPrefix + nSuffix;

  /* If the prefix and suffix overlap, that means that we are dealing with
  ** a pure insertion or deletion of text that can have multiple alignments.
  ** Try to find an alignment to begins and ends on whitespace, or on
  ** punctuation, rather than in the middle of a name or number.
  */
  if( nCommon > nShort ){
    int iBest = -1;
    int iBestVal = -1;
    int i;
    int nLong = nLeft<nRight ? nRight : nLeft;
    int nGap = nLong - nShort;
    for(i=nShort-nSuffix; i<=nPrefix; i++){
       int iVal = 0;
       char c = zLeft[i];
       if( fsl_isspace(c) ){
         iVal += 5;
       }else if( !fsl_isalnum(c) ){
         iVal += 2;
       }
       c = zLeft[i+nGap-1];
       if( fsl_isspace(c) ){
         iVal += 5;
       }else if( !fsl_isalnum(c) ){
         iVal += 2;
       }
       if( iVal>iBestVal ){
         iBestVal = iVal;
         iBest = i;
       }
    }
    nPrefix = iBest;
    nSuffix = nShort - nPrefix;
    nCommon = nPrefix + nSuffix;
  }

  /* A single chunk of text inserted */
  if( nCommon==nLeft ){
    p->n = 1;
    p->a[0].iStart1 = nPrefix;
    p->a[0].iLen1 = 0;
    p->a[0].iStart2 = nPrefix;
    p->a[0].iLen2 = nRight - nCommon;
    improveReadability(zLeft, zRight, p);
    return;
  }

  /* A single chunk of text deleted */
  if( nCommon==nRight ){
    p->n = 1;
    p->a[0].iStart1 = nPrefix;
    p->a[0].iLen1 = nLeft - nCommon;
    p->a[0].iStart2 = nPrefix;
    p->a[0].iLen2 = 0;
    improveReadability(zLeft, zRight, p);
    return;
  }

  /* At this point we know that there is a chunk of text that has
  ** changed between the left and the right.  Check to see if there
  ** is a large unchanged section in the middle of that changed block.
  */
  nLeftDiff = nLeft - nCommon;
  nRightDiff = nRight - nCommon;
  if( nLeftDiff >= 4
   && nRightDiff >= 4
   && textLineChanges(&zLeft[nPrefix], nLeftDiff,
                      &zRight[nPrefix], nRightDiff, p)>1
  ){
    int i;
    for(i=0; i<p->n; i++){
      p->a[i].iStart1 += nPrefix;
      p->a[i].iStart2 += nPrefix;
    }
    improveReadability(zLeft, zRight, p);
    return;
  }

  /* If all else fails, show a single big change between left and right */
  p->n = 1;
  p->a[0].iStart1 = nPrefix;
  p->a[0].iLen1 = nLeft - nCommon;
  p->a[0].iStart2 = nPrefix;
  p->a[0].iLen2 = nRight - nCommon;
  improveReadability(zLeft, zRight, p);
}



/*
** The threshold at which diffBlockAlignment transitions from the
** O(N*N) Wagner minimum-edit-distance algorithm to a less process
** O(NlogN) divide-and-conquer approach.
*/
#define DIFF_ALIGN_MX  1225

/*
** R[] is an array of six integer, two COPY/DELETE/INSERT triples for a
** pair of adjacent differences.  Return true if the gap between these
** two differences is so small that they should be rendered as a single
** edit.
*/
static int smallGap2(const int *R, int ma, int mb){
  int m = R[3];
  ma += R[4] + m;
  mb += R[5] + m;
  if( ma*mb>DIFF_ALIGN_MX ) return 0;
  return m<=2 || m<=(R[1]+R[2]+R[4]+R[5])/8;
}

static unsigned short diff_opt_context_lines(fsl_diff_opt const * cfg){
  const unsigned short dflt = 5;
  unsigned short n = cfg ? cfg->nContext : dflt;
  if( !n && (cfg->diffFlags & FSL_DIFF2_CONTEXT_ZERO)==0 ){
    n = dflt;
  }
  return n;
}

/*
** Minimum of two values
*/
static int minInt(int a, int b){ return a<b ? a : b; }

/****************************************************************************/
/*
** Return the number between 0 and 100 that is smaller the closer pA and
** pB match.  Return 0 for a perfect match.  Return 100 if pA and pB are
** completely different.
**
** The current algorithm is as follows:
**
** (1) Remove leading and trailing whitespace.
** (2) Truncate both strings to at most 250 characters
** (3) If the two strings have a common prefix, measure that prefix
** (4) Find the length of the longest common subsequence that is
**     at least 150% longer than the common prefix.
** (5) Longer common subsequences yield lower scores.
*/
static int match_dline2(const fsl_dline *pA, const fsl_dline *pB){
  const char *zA;            /* Left string */
  const char *zB;            /* right string */
  int nA;                    /* Bytes in zA[] */
  int nB;                    /* Bytes in zB[] */
  int nMin;
  int nPrefix;
  int avg;                   /* Average length of A and B */
  int i, j, k;               /* Loop counters */
  int best = 0;              /* Longest match found so far */
  int score;                 /* Final score.  0..100 */
  unsigned char c;           /* Character being examined */
  unsigned char aFirst[256]; /* aFirst[X] = index in zB[] of first char X */
  unsigned char aNext[252];  /* aNext[i] = index in zB[] of next zB[i] char */

  zA = pA->z;
  zB = pB->z;
  nA = pA->n;
  nB = pB->n;
  while( nA>0 && (unsigned char)zA[0]<=' ' ){ nA--; zA++; }
  while( nA>0 && (unsigned char)zA[nA-1]<=' ' ){ nA--; }
  while( nB>0 && (unsigned char)zB[0]<=' ' ){ nB--; zB++; }
  while( nB>0 && (unsigned char)zB[nB-1]<=' ' ){ nB--; }
  if( nA>250 ) nA = 250;
  if( nB>250 ) nB = 250;
  avg = (nA+nB)/2;
  if( avg==0 ) return 0;
  nMin = nA;
  if( nB<nMin ) nMin = nB;
  if( nMin==0 ) return 68;
  for(nPrefix=0; nPrefix<nMin && zA[nPrefix]==zB[nPrefix]; nPrefix++){}
  best = 0;
  if( nPrefix>5 && nPrefix>nMin/2 ){
    best = nPrefix*3/2;
    if( best>=avg - 2 ) best = avg - 2;
  }
  if( nA==nB && memcmp(zA, zB, nA)==0 ) return 0;
  memset(aFirst, 0xff, sizeof(aFirst));
  zA--; zB--;   /* Make both zA[] and zB[] 1-indexed */
  for(i=nB; i>0; i--){
    c = (unsigned char)zB[i];
    aNext[i] = aFirst[c];
    aFirst[c] = i;
  }
  for(i=1; i<=nA-best; i++){
    c = (unsigned char)zA[i];
    for(j=aFirst[c]; j<nB-best && memcmp(&zA[i],&zB[j],best)==0; j = aNext[j]){
      int limit = minInt(nA-i, nB-j);
      for(k=best; k<=limit && zA[k+i]==zB[k+j]; k++){}
      if( k>best ) best = k;
    }
  }
  score = (best>=avg) ? 0 : (avg - best)*100/avg;

#if 0
  fprintf(stderr, "A: [%.*s]\nB: [%.*s]\nbest=%d avg=%d score=%d\n",
  nA, zA+1, nB, zB+1, best, avg, score);
#endif

  /* Return the result */
  return score;
}

/*
** There is a change block in which nLeft lines of text on the left are
** converted into nRight lines of text on the right.  This routine computes
** how the lines on the left line up with the lines on the right.
**
** The return value is a buffer of unsigned characters, obtained from
** fossil_malloc().  (The caller needs to free the return value using
** fossil_free().)  Entries in the returned array have values as follows:
**
**    1.  Delete the next line of pLeft.
**    2.  Insert the next line of pRight.
**    3.  The next line of pLeft changes into the next line of pRight.
**    4.  Delete one line from pLeft and add one line to pRight.
**
** The length of the returned array will be at most nLeft+nRight bytes.
** If the first bytes is 4,  that means we could not compute reasonable
** alignment between the two blocks.
**
** Algorithm:  Wagner's minimum edit-distance algorithm, modified by
** adding a cost to each match based on how well the two rows match
** each other.  Insertion and deletion costs are 50.  Match costs
** are between 0 and 100 where 0 is a perfect match 100 is a complete
** mismatch.
*/
static int diffBlockAlignment(
  const fsl_dline *aLeft, int nLeft,     /* Text on the left */
  const fsl_dline *aRight, int nRight,   /* Text on the right */
  fsl_diff_opt *pCfg,                  /* Configuration options */
  unsigned char **pResult, /* Raw result */
  unsigned *pNResult               /* OUTPUT: length of result */
){
  int i, j, k;                 /* Loop counters */
  int *a = 0;                  /* One row of the Wagner matrix */
  int *pToFree = 0;            /* Space that needs to be freed */
  unsigned char *aM = 0;       /* Wagner result matrix */
  int nMatch, iMatch;          /* Number of matching lines and match score */
  int aBuf[100];               /* Stack space for a[] if nRight not to big */
  int rc = 0;

  if( nLeft==0 ){
    aM = fsl_malloc( nRight + 2 );
    if(!aM) return FSL_RC_OOM;
    memset(aM, 2, nRight);
    *pNResult = nRight;
    *pResult = aM;
    return 0;
  }
  if( nRight==0 ){
    aM = fsl_malloc( nLeft + 2 );
    if(!aM) return FSL_RC_OOM;
    memset(aM, 1, nLeft);
    *pNResult = nLeft;
    *pResult = aM;
    return 0;
  }

  /* For large alignments, use a divide and conquer algorithm that is
  ** O(NlogN).  The result is not as precise, but this whole thing is an
  ** approximation anyhow, and the faster response time is an acceptable
  ** trade-off for reduced precision.
  */
  if( nLeft*nRight>DIFF_ALIGN_MX
      && (pCfg->diffFlags & FSL_DIFF2_SLOW_SBS)==0 ){
    const fsl_dline *aSmall;   /* The smaller of aLeft and aRight */
    const fsl_dline *aBig;     /* The larger of aLeft and aRight */
    int nSmall, nBig;      /* Size of aSmall and aBig.  nSmall<=nBig */
    int iDivSmall, iDivBig;  /* Divider point for aSmall and aBig */
    int iDivLeft, iDivRight; /* Divider point for aLeft and aRight */
    unsigned char *a1 = 0, *a2 = 0;  /* Results of the alignments on two halves */
    unsigned int n1, n2;     /* Number of entries in a1 and a2 */
    int score, bestScore;    /* Score and best score seen so far */
    if( nLeft>nRight ){
      aSmall = aRight;
      nSmall = nRight;
      aBig = aLeft;
      nBig = nLeft;
    }else{
      aSmall = aLeft;
      nSmall = nLeft;
      aBig = aRight;
      nBig = nRight;
    }
    iDivBig = nBig/2;
    iDivSmall = nSmall/2;
    bestScore = 10000;
    for(i=0; i<nSmall; i++){
      score = match_dline2(aBig+iDivBig, aSmall+i) + abs(i-nSmall/2)*2;
      if( score<bestScore ){
        bestScore = score;
        iDivSmall = i;
      }
    }
    if( aSmall==aRight ){
      iDivRight = iDivSmall;
      iDivLeft = iDivBig;
    }else{
      iDivRight = iDivBig;
      iDivLeft = iDivSmall;
    }
    rc = diffBlockAlignment(aLeft,iDivLeft,aRight, iDivRight,
                            pCfg, &a1, &n1);
    if(!rc){
      rc = diffBlockAlignment(aLeft+iDivLeft, nLeft-iDivLeft,
                              aRight+iDivRight, nRight-iDivRight,
                              pCfg, &a2, &n2);
    }
    if(rc) goto bail;
    void * a1Re = fsl_realloc(a1, n1+n2 );
    if(!a1Re) goto bail;
    a1 = (unsigned char *)a1Re;
    memcpy(a1+n1,a2,n2);
    fsl_free(a2);
    a2 = 0;
    *pNResult = n1+n2;
    *pResult = a1;
    return 0;
    bail:
    assert(0!=rc);
    fsl_free( a1 );
    fsl_free( a2 );
    goto end;
  }

  /* If we reach this point, we will be doing an O(N*N) Wagner minimum
  ** edit distance to compute the alignment.
  */
  if( nRight < (int)(sizeof(aBuf)/sizeof(aBuf[0]))-1 ){
    pToFree = 0;
    a = aBuf;
  }else{
    a = pToFree = fsl_malloc( sizeof(a[0])*(nRight+1) );
    if(!a){
      rc = FSL_RC_OOM;
      goto end;
    }
  }
  aM = fsl_malloc( (nLeft+1)*(nRight+1) );
  if(!aM){
    rc = FSL_RC_OOM;
    goto end;
  }

  /* Compute the best alignment */
  for(i=0; i<=nRight; i++){
    aM[i] = 2;
    a[i] = i*50;
  }
  aM[0] = 0;
  for(j=1; j<=nLeft; j++){
    int p = a[0];
    a[0] = p+50;
    aM[j*(nRight+1)] = 1;
    for(i=1; i<=nRight; i++){
      int m = a[i-1]+50;
      int d = 2;
      if( m>a[i]+50 ){
        m = a[i]+50;
        d = 1;
      }
      if( m>p ){
        int const score =
          match_dline2(&aLeft[j-1], &aRight[i-1]);
        if( (score<=90 || (i<j+1 && i>j-1)) && m>p+score ){
          m = p+score;
          d = 3 | score*4;
        }
      }
      p = a[i];
      a[i] = m;
      aM[j*(nRight+1)+i] = d;
    }
  }

  /* Compute the lowest-cost path back through the matrix */
  i = nRight;
  j = nLeft;
  k = (nRight+1)*(nLeft+1)-1;
  nMatch = iMatch = 0;
  while( i+j>0 ){
    unsigned char c = aM[k];
    if( c>=3 ){
      assert( i>0 && j>0 );
      i--;
      j--;
      nMatch++;
      iMatch += (c>>2);
      aM[k] = 3;
    }else if( c==2 ){
      assert( i>0 );
      i--;
    }else{
      assert( j>0 );
      j--;
    }
    k--;
    aM[k] = aM[j*(nRight+1)+i];
  }
  k++;
  i = (nRight+1)*(nLeft+1) - k;
  memmove(aM, &aM[k], i);
  *pNResult = i;
  *pResult = aM;

  end:
  fsl_free(pToFree);
  return rc;
}


/*
** Format a diff using a fsl_diff_builder object
*/
static int fdb__format(
  fsl_diff_cx * const cx,
  fsl_diff_builder * const pBuilder
){
  const fsl_dline *A;        /* Left side of the diff */
  const fsl_dline *B;        /* Right side of the diff */
  fsl_diff_opt * const pCfg = pBuilder->cfg;
  const int *R;          /* Array of COPY/DELETE/INSERT triples */
  unsigned int a = 0;    /* Index of next line in A[] */
  unsigned int b = 0;    /* Index of next line in B[] */
  unsigned int r;        /* Index into R[] */
  unsigned int nr;       /* Number of COPY/DELETE/INSERT triples to process */
  unsigned int mxr;      /* Maximum value for r */
  unsigned int na, nb;   /* Number of lines shown from A and B */
  unsigned int i, j;     /* Loop counters */
  unsigned int m, ma, mb;/* Number of lines to output */
  signed int skip = 0;   /* Number of lines to skip */
  unsigned int nContext; /* Lines of context above and below each change */
  int rc = 0;

#define RC if(rc) goto end
  nContext = diff_opt_context_lines(pCfg);
  A = cx->aFrom;
  B = cx->aTo;
  R = cx->aEdit;
  mxr = cx->nEdit;
  //MARKER(("nContext=%u, nEdit = %d, mxr=%u\n", nContext, cx->nEdit, mxr));
  while( mxr>2 && R[mxr-1]==0 && R[mxr-2]==0 ){ mxr -= 3; }

  ++pCfg->fileCount;
  if(pBuilder->start){
    rc = pBuilder->start(pBuilder);
    RC;
  }

  for(r=0; r<mxr; r += 3*nr){
    /* Figure out how many triples to show in a single block */
    for(nr=1; R[r+nr*3]>0 && R[r+nr*3]<(int)nContext*2; nr++){}

#if 0
    /* MISSING: this "should" be replaced by a stateful predicate
       function, probably in the fsl_diff_opt class. */
    /* If there is a regex, skip this block (generate no diff output)
    ** if the regex matches or does not match both insert and delete.
    ** Only display the block if one side matches but the other side does
    ** not.
    */
    if( pCfg->pRe ){
      int hideBlock = 1;
      int xa = a, xb = b;
      for(i=0; hideBlock && i<nr; i++){
        int c1, c2;
        xa += R[r+i*3];
        xb += R[r+i*3];
        c1 = re_dline_match(pCfg->pRe, &A[xa], R[r+i*3+1]);
        c2 = re_dline_match(pCfg->pRe, &B[xb], R[r+i*3+2]);
        hideBlock = c1==c2;
        xa += R[r+i*3+1];
        xb += R[r+i*3+2];
      }
      if( hideBlock ){
        a = xa;
        b = xb;
        continue;
      }
    }
#endif

    /* Figure out how many lines of A and B are to be displayed
    ** for this change block.
    */
    if( R[r]>(int)nContext ){
      na = nb = nContext;
      skip = R[r] - nContext;
    }else{
      na = nb = R[r];
      skip = 0;
    }
    for(i=0; i<nr; i++){
      na += R[r+i*3+1];
      nb += R[r+i*3+2];
    }
    if( R[r+nr*3]>(int)nContext ){
      na += nContext;
      nb += nContext;
    }else{
      na += R[r+nr*3];
      nb += R[r+nr*3];
    }
    for(i=1; i<nr; i++){
      na += R[r+i*3];
      nb += R[r+i*3];
    }


    //MARKER(("Chunk header... a=%u, b=%u, na=%u, nb=%u, skip=%d\n", a, b, na, nb, skip));
    if(pBuilder->chunkHeader){
      rc = pBuilder->chunkHeader(pBuilder,
                                 (uint32_t)(na ? a+skip+1 : a+skip),
                                 (uint32_t)na,
                                 (uint32_t)(nb ? b+skip+1 : b+skip),
                                 (uint32_t)nb);
      RC;
    }

    /* Show the initial common area */
    a += skip;
    b += skip;
    m = R[r] - skip;
    if( r ) skip -= nContext;

    //MARKER(("Show the initial common... a=%u, b=%u, m=%u, r=%u, skip=%d\n", a, b, m, r, skip));
    if( skip>0 ){
      if( NULL==pBuilder->chunkHeader && skip<(int)nContext ){
        /* 2021-09-27: BUG: this is incompatible with unified diff
           format. The generated header lines say we're skipping X
           lines but we then end up including lines which that header
           says to skip. As a workaround, we'll only run this when
           pBuilder->chunkHeader is NULL, noting that fossil's
           diff builder interface does not have that method.

           Without this block, our "utxt" diff builder can mimic
           fossil's non-diff builder unified diff format, except that
           we add Index lines (feature or bug?). With this block,
           the header values output above are wrong.
        */
        /* If the amount to skip is less that the context band, then
        ** go ahead and show the skip band as it is not worth eliding */
        //MARKER(("skip %d < nContext %d\n", skip, nContext));
        /* from fossil(1) from formatDiff() */
        for(j=0; 0==rc && j<(unsigned)skip; j++){
          //MARKER(("(A) COMMON\n"));
          rc = pBuilder->common(pBuilder, &A[a+j-skip]);
        }
      }else{
        rc = pBuilder->skip(pBuilder, skip, 0);
      }
      RC;
    }
    for(j=0; 0==rc && j<m; j++){
      //MARKER(("(B) COMMON\n"));
      rc = pBuilder->common(pBuilder, &A[a+j]);
    }
    RC;
    a += m;
    b += m;
    //MARKER(("Show the differences... a=%d, b=%d, m=%d\n", a, b, m));

    /* Show the differences */
    for(i=0; i<nr; i++){
      unsigned int nAlign;
      unsigned char *alignment = 0;
      ma = R[r+i*3+1];   /* Lines on left but not on right */
      mb = R[r+i*3+2];   /* Lines on right but not on left */

      /* Try merging the current block with subsequent blocks, if the
      ** subsequent blocks are nearby and their result isn't too big.
      */
      while( i<nr-1 && smallGap2(&R[r+i*3],ma,mb) ){
        i++;
        m = R[r+i*3];
        ma += R[r+i*3+1] + m;
        mb += R[r+i*3+2] + m;
      }

      /* Try to find an alignment for the lines within this one block */
      rc = diffBlockAlignment(&A[a], ma, &B[b], mb, pCfg,
                              &alignment, &nAlign);
      RC;
      for(j=0; ma+mb>0; j++){
        assert( j<nAlign );
        switch( alignment[j] ){
          case 1: {
            /* Delete one line from the left */
            rc = pBuilder->deletion(pBuilder, &A[a]);
            if(rc) goto bail;
            ma--;
            a++;
            break;
          }
          case 2: {
            /* Insert one line on the right */
            rc = pBuilder->insertion(pBuilder, &B[b]);
            if(rc) goto bail;
            assert( mb>0 );
            mb--;
            b++;
            break;
          }
          case 3: {
            /* The left line is changed into the right line */
            if( cx->cmpLine(&A[a], &B[b])==0 ){
              //MARKER(("C common\n"));
              rc = pBuilder->common(pBuilder, &A[a]);
            }else{
              rc = pBuilder->edit(pBuilder, &A[a], &B[b]);
            }
            if(rc) goto bail;
            assert( ma>0 && mb>0 );
            ma--;
            mb--;
            a++;
            b++;
            break;
          }
          case 4: {
            /* Delete from left then separately insert on the right */
            rc = pBuilder->replacement(pBuilder, &A[a], &B[b]);
            if(rc) goto bail;
            ma--;
            a++;
            mb--;
            b++;
            break;
          }
        }
      }
      assert( nAlign==j );
      fsl_free(alignment);
      if( i<nr-1 ){
        m = R[r+i*3+3];
        for(j=0; 0==rc && j<m; j++){
          //MARKER(("D common\n"));
          rc = pBuilder->common(pBuilder, &A[a+j]);
        }
        RC;
        b += m;
        a += m;
      }
      continue;
      bail:
      assert(rc);
      fsl_free(alignment);
      goto end;
    }

    /* Show the final common area */
    assert( nr==i );
    m = R[r+nr*3];
    if( m>nContext ) m = nContext;
    for(j=0; 0==rc && j<m && j<nContext; j++){
      //MARKER(("E common\n"));
      rc = pBuilder->common(pBuilder, &A[a+j]);
    }
    RC;
  }
  if( R[r]>(int)nContext ){
    rc = pBuilder->skip(pBuilder, R[r] - nContext, true);
  }
  end:
#undef RC
  if(0==rc && pBuilder->finish){
    pBuilder->finish(pBuilder);
  }
  return rc;
}

/* MISSING(?) fossil(1) converts the diff inputs into utf8 with no
   BOM. Whether we really want to do that here or rely on the caller
   to is up for debate. If we do it here, we have to make the inputs
   non-const, which seems "wrong" for a library API. */
#define blob_to_utf8_no_bom(A,B) (void)0

/**
   Performs a diff of version 1 (pA) and version 2 (pB). ONE of
   pBuilder or outRaw must be non-NULL. If pBuilder is not NULL, all
   output for the diff is emitted via pBuilder. If outRaw is not NULL
   then on success *outRaw is set to the array of diff triples,
   transfering ownership to the caller, who must eventually fsl_free()
   it. On error, *outRaw is not modified but pBuilder may have emitted
   partial output. That is not knowable for the general
   case. Ownership of pBuilder is not changed. If pBuilder is not NULL
   then pBuilder->cfg must be non-NULL.
*/
static int fsl_diff2_text_impl(fsl_buffer const *pA,
                               fsl_buffer const *pB,
                               fsl_diff_builder * const pBuilder,
                               fsl_diff_opt const * const cfg,
                               int ** outRaw){
  int rc = 0;
  fsl_diff_cx c = fsl_diff_cx_empty;
  bool ignoreWs = false;
  assert(cfg);
  if(!pA || !pB || (pBuilder && outRaw)
     || (!cfg && !outRaw)) return FSL_RC_MISUSE;

  blob_to_utf8_no_bom(pA, 0);
  blob_to_utf8_no_bom(pB, 0);

  if( cfg->diffFlags & FSL_DIFF2_INVERT ){
    fsl_buffer const *pTemp = pA;
    pA = pB;
    pB = pTemp;
  }

  ignoreWs = (cfg->diffFlags & FSL_DIFF2_IGNORE_ALLWS)!=0;
  if(FSL_DIFF2_IGNORE_ALLWS==(cfg->diffFlags & FSL_DIFF2_IGNORE_ALLWS)){
    c.cmpLine = fsl_dline_cmp_ignore_ws;
  }else{
    c.cmpLine = fsl_dline_cmp;
  }
 
  rc = fsl_break_into_dlines(fsl_buffer_cstr(pA), (fsl_int_t)pA->used,
                             (uint32_t*)&c.nFrom, &c.aFrom, cfg->diffFlags);
  if(rc) goto end;
  rc = fsl_break_into_dlines(fsl_buffer_cstr(pB), (fsl_int_t)pB->used,
                             (uint32_t*)&c.nTo, &c.aTo, cfg->diffFlags);
  if(rc) goto end;
  if( c.aFrom==0 || c.aTo==0 ){
    /* Binary data */
    rc = FSL_RC_DIFF_BINARY;
    goto end;
  }

  /* Compute the difference */
  rc = fsl__diff_all(&c);
  if(rc) goto end;

  if( ignoreWs && c.nEdit==6 && c.aEdit[1]==0 && c.aEdit[2]==0 ){
    rc = FSL_RC_DIFF_WS_ONLY;
    goto end;
  }
  if( (cfg->diffFlags & FSL_DIFF2_NOTTOOBIG)!=0 ){
    int i, m, n;
    int const * const a = c.aEdit;
    int const mx = c.nEdit;
    for(i=m=n=0; i<mx; i+=3){ m += a[i]; n += a[i+1]+a[i+2]; }
    if( !n || n>10000 ){
      rc = FSL_RC_RANGE;
      /* diff_errmsg(pOut, DIFF_TOO_MANY_CHANGES, diffFlags); */
      goto end;
    }
  }
  //fsl__dump_triples(&c, __FILE__, __LINE__);
  if( (cfg->diffFlags & FSL_DIFF2_NOOPT)==0 ){
    fsl__diff_optimize(&c);
  }
  //fsl__dump_triples(&c, __FILE__, __LINE__);

  /**
     Reference:

     https://fossil-scm.org/home/file?ci=cae7036bb7f07c1b&name=src/diff.c&ln=2749-2804

     Noting that:

     - That function's return value is this one's *outRaw

     - DIFF_NUMSTAT flag is not implemented. For that matter,
     flags which result in output going anywhere except for
     pBuilder->out are not implemented here, e.g. DIFF_RAW.

     That last point makes this impl tiny compared to the original!
  */

  if(pBuilder){
    rc = fdb__format(&c, pBuilder);
  }
  end:
  if(0==rc && outRaw){
    *outRaw = c.aEdit;
    c.aEdit = 0;
  }
  fsl__diff_cx_clean(&c);
  return rc;
}

int fsl_diff_v2(fsl_buffer const * pv1,
                fsl_buffer const * pv2,
                fsl_diff_builder * const pBuilder){
  return fsl_diff2_text_impl(pv1, pv2, pBuilder, pBuilder->cfg, NULL);
}

int fsl_diff_v2_raw(fsl_buffer const * pv1,
                    fsl_buffer const * pv2,
                    fsl_diff_opt const * const cfg,
                    int **outRaw ){
  return fsl_diff2_text_impl(pv1, pv2, NULL, cfg, outRaw);
}


/**
   Allocator for fsl_diff_builder instances. If extra is >0 then that
   much extra space is allocated as part of the same block and the
   pimpl member of the returned object is pointed to that space.
*/
static fsl_diff_builder * fsl__diff_builder_alloc(fsl_size_t extra){
  fsl_diff_builder * rc =
    (fsl_diff_builder*)fsl_malloc(sizeof(fsl_diff_builder) + extra);
  if(rc){
    *rc = fsl_diff_builder_empty;
    if(extra){
      rc->pimpl = ((unsigned char *)rc)+sizeof(fsl_diff_builder);
    }
  }
  return rc;
}

static int fdb__out(fsl_diff_builder *const b,
                    char const *z, fsl_size_t n){
  return b->cfg->out(b->cfg->outState, z, n);
}
static int fdb__outf(fsl_diff_builder * const b,
                     char const *fmt, ...){
  int rc = 0;
  va_list va;
  assert(b->cfg->out);
  va_start(va,fmt);
  rc = fsl_appendfv(b->cfg->out, b->cfg->outState, fmt, va);
  va_end(va);
  return rc;
}


static int fdb__debug_start(fsl_diff_builder * const b){
  int rc = fdb__outf(b, "DEBUG builder starting up.\n");
  if(0==rc && b->cfg->nameLHS){
    rc = fdb__outf(b,"LHS: %s\n", b->cfg->nameLHS);
  }
  if(0==rc && b->cfg->nameRHS){
    rc = fdb__outf(b,"RHS: %s\n", b->cfg->nameRHS);
  }
  if(0==rc && b->cfg->hashLHS){
    rc = fdb__outf(b,"LHS hash: %s\n", b->cfg->hashLHS);
  }
  if(0==rc && b->cfg->hashRHS){
    rc = fdb__outf(b,"RHS hash: %s\n", b->cfg->hashRHS);
  }
  return rc;
}


static int fdb__debug_chunkHeader(fsl_diff_builder* const b,
                                  uint32_t lnnoLHS, uint32_t linesLHS,
                                  uint32_t lnnoRHS, uint32_t linesRHS ){
#if 1
  return fdb__outf(b, "@@ -%" PRIu32 ",%" PRIu32
                   " +%" PRIu32 ",%" PRIu32 " @@\n",
                   lnnoLHS, linesLHS, lnnoRHS, linesRHS);
#else
  return 0;
#endif
}

static int fdb__debug_skip(fsl_diff_builder * const p, uint32_t n, bool isFinal){
  const int rc = fdb__outf(p, "SKIP %u (%u..%u left and %u..%u right)%s\n",
                           n, p->lnLHS+1, p->lnLHS+n, p->lnRHS+1, p->lnRHS+n,
                           isFinal ? " FINAL" : "");
  p->lnLHS += n;
  p->lnRHS += n;
  return rc;
}
static int fdb__debug_common(fsl_diff_builder * const p, fsl_dline const * pLine){
  ++p->lnLHS;
  ++p->lnRHS;
  return fdb__outf(p, "COMMON  %8u %8u %.*s\n",
                   p->lnLHS, p->lnRHS, (int)pLine->n, pLine->z);
}
static int fdb__debug_insertion(fsl_diff_builder * const p, fsl_dline const * pLine){
  p->lnRHS++;
  return fdb__outf(p, "INSERT           %8u %.*s\n",
                   p->lnRHS, (int)pLine->n, pLine->z);
}
static int fdb__debug_deletion(fsl_diff_builder * const p, fsl_dline const * pLine){
  p->lnLHS++;
  return fdb__outf(p, "DELETE  %8u          %.*s\n",
                   p->lnLHS, (int)pLine->n, pLine->z);
}
static int fdb__debug_replacement(fsl_diff_builder * const p,
                              fsl_dline const * lineLhs,
                              fsl_dline const * lineRhs) {
  int rc;
  p->lnLHS++;
  p->lnRHS++;
  rc = fdb__outf(p, "REPLACE %8u          %.*s\n",
      p->lnLHS, (int)lineLhs->n, lineLhs->z);
  if(!rc){
    rc = fdb__outf(p, "            %8u %.*s\n",
                   p->lnRHS, (int)lineRhs->n, lineRhs->z);
  }
  return rc;
}
                 
static int fdb__debug_edit(fsl_diff_builder * const b,
                           fsl_dline const * pX,
                           fsl_dline const * pY){
  int rc = 0;
  int i, j;
  int x;
  fsl_dline_change chng = fsl_dline_change_empty;
#define RC if(rc) goto end
  b->lnLHS++;
  b->lnRHS++;
  rc = fdb__outf(b, "EDIT    %8u          %.*s\n",
                 b->lnLHS, (int)pX->n, pX->z);
  RC;
  fsl_dline_change_spans(pX, pY, &chng);
  for(i=x=0; i<chng.n; i++){
    int ofst = chng.a[i].iStart1;
    int len = chng.a[i].iLen1;
    if( len ){
      char c = '0' + i;
      if( x==0 ){
        rc = fdb__outf(b, "%*s", 26, "");
        RC;
      }
      while( ofst > x ){
        if( (pX->z[x]&0xc0)!=0x80 ){
          rc = fdb__out(b, " ", 1);
          RC;
        }
        x++;
      }
      for(j=0; j<len; j++, x++){
        if( (pX->z[x]&0xc0)!=0x80 ){
          rc = fdb__out(b, &c, 1);
          RC;
        }
      }
    }
  }
  if( x ){
    rc = fdb__out(b, "\n", 1);
    RC;
  }
  rc = fdb__outf(b, "                 %8u %.*s\n",
                 b->lnRHS, (int)pY->n, pY->z);
  RC;
  for(i=x=0; i<chng.n; i++){
    int ofst = chng.a[i].iStart2;
    int len = chng.a[i].iLen2;
    if( len ){
      char c = '0' + i;
      if( x==0 ){
        rc = fdb__outf(b, "%*s", 26, "");
        RC;
      }
      while( ofst > x ){
        if( (pY->z[x]&0xc0)!=0x80 ){
          rc = fdb__out(b, " ", 1);
          RC;
        }
        x++;
      }
      for(j=0; j<len; j++, x++){
        if( (pY->z[x]&0xc0)!=0x80 ){
          rc = fdb__out(b, &c, 1);
          RC;
        }
      }
    }
  }
  if( x ){
    rc = fdb__out(b, "\n", 1);
  }
  end:
#undef RC
  return rc;
}

static int fdb__debug_finish(fsl_diff_builder * const b){
  return fdb__outf(b, "END with %u lines left and %u lines right\n",
                   b->lnLHS, b->lnRHS);
}

static void fdb__generic_finalize(fsl_diff_builder * const b){
  fsl_free(b);
}

static fsl_diff_builder * fsl__diff_builder_debug(void){
  fsl_diff_builder * rc = fsl__diff_builder_alloc(0);
  if(rc){
    rc->chunkHeader = fdb__debug_chunkHeader;
    rc->start = fdb__debug_start;
    rc->skip = fdb__debug_skip;
    rc->common = fdb__debug_common;
    rc->insertion = fdb__debug_insertion;
    rc->deletion = fdb__debug_deletion;
    rc->replacement = fdb__debug_replacement;
    rc->edit = fdb__debug_edit;
    rc->finish = fdb__debug_finish;
    rc->finalize = fdb__generic_finalize;
    assert(!rc->pimpl);
    assert(0==rc->implFlags);
    assert(0==rc->lnLHS);
    assert(0==rc->lnRHS);
    assert(NULL==rc->cfg);
  }
  return rc;
}

/******************** json1 diff builder ********************/
/* Description taken verbatim from fossil(1): */
/*
** This formatter generates a JSON array that describes the difference.
**
** The Json array consists of integer opcodes with each opcode followed
** by zero or more arguments:
**
**   Syntax        Mnemonic    Description
**   -----------   --------    --------------------------
**   0             END         This is the end of the diff
**   1  INTEGER    SKIP        Skip N lines from both files
**   2  STRING     COMMON      The line show by STRING is in both files
**   3  STRING     INSERT      The line STRING is in only the right file
**   4  STRING     DELETE      The STRING line is in only the left file
**   5  SUBARRAY   EDIT        One line is different on left and right.
**
** The SUBARRAY is an array of 3*N+1 strings with N>=0.  The triples
** represent common-text, left-text, and right-text.  The last string
** in SUBARRAY is the common-suffix.  Any string can be empty if it does
** not apply.
*/

static int fdb__outj(fsl_diff_builder * const b,
                     char const *zJson, int n){
  return n<0
    ? fdb__outf(b, "%!j", zJson)
    : fdb__outf(b, "%!.*j", n, zJson);
}

static int fdb__json1_start(fsl_diff_builder * const b){
  int rc = fdb__outf(b, "{\"hashLHS\": %!j, \"hashRHS\": %!j, ",
                     b->cfg->hashLHS, b->cfg->hashRHS);
  if(0==rc && b->cfg->nameLHS){
    rc = fdb__outf(b, "\"nameLHS\": %!j, ", b->cfg->nameLHS);
  }
  if(0==rc && b->cfg->nameRHS){
    rc = fdb__outf(b, "\"nameRHS\": %!j, ", b->cfg->nameRHS);
  }
  if(0==rc){
    rc = fdb__out(b, "\"diff\":[", 8);
  }
  return rc;
}

static int fdb__json1_skip(fsl_diff_builder * const b, uint32_t n, bool isFinal){
  return fdb__outf(b, "1,%" PRIu32 ",\n", n);
}
static int fdb__json1_common(fsl_diff_builder * const b, fsl_dline const * pLine){
  int rc = fdb__out(b, "2,",2);
  if(!rc) {
    rc = fdb__outj(b, pLine->z, (int)pLine->n);
    if(!rc) rc = fdb__out(b, ",\n",2);
  }
  return rc;
}
static int fdb__json1_insertion(fsl_diff_builder * const b, fsl_dline const * pLine){
  int rc = fdb__out(b, "3,",2);
  if(!rc){
    rc = fdb__outj(b, pLine->z, (int)pLine->n);
    if(!rc) rc = fdb__out(b, ",\n",2);
  }
  return rc;
}
static int fdb__json1_deletion(fsl_diff_builder * const b, fsl_dline const * pLine){
  int rc = fdb__out(b, "4,",2);
  if(!rc){
    rc = fdb__outj(b, pLine->z, (int)pLine->n);
    if(!rc) rc = fdb__out(b, ",\n",2);
  }
  return rc;
}
static int fdb__json1_replacement(fsl_diff_builder * const b,
                              fsl_dline const * lineLhs,
                              fsl_dline const * lineRhs) {
  int rc = fdb__out(b, "5,[\"\",",6);
  if(!rc) rc = fdb__outf(b,"%!.*j", (int)lineLhs->n, lineLhs->z);
  if(!rc) rc = fdb__out(b, ",", 1);
  if(!rc) rc = fdb__outf(b,"%!.*j", (int)lineRhs->n, lineRhs->z);
  if(!rc) rc = fdb__out(b, ",\"\"],\n",6);
  return rc;
}
                 
static int fdb__json1_edit(fsl_diff_builder * const b,
                           fsl_dline const * pX,
                           fsl_dline const * pY){
  int rc = 0;
  int i,x;
  fsl_dline_change chng = fsl_dline_change_empty;

#define RC if(rc) goto end
  rc = fdb__out(b, "5,[", 3); RC;
  fsl_dline_change_spans(pX, pY, &chng);
  for(i=x=0; i<(int)chng.n; i++){
    rc = fdb__outj(b, pX->z + x, (int)chng.a[i].iStart1 - x); RC;
    x = chng.a[i].iStart1;
    rc = fdb__out(b, ",", 1); RC;
    rc = fdb__outj(b, pX->z + x, (int)chng.a[i].iLen1); RC;
    x += chng.a[i].iLen1;
    rc = fdb__out(b, ",", 1); RC;
    rc = fdb__outj(b, pY->z + chng.a[i].iStart2,
                   (int)chng.a[i].iLen2); RC;
  }
  rc = fdb__out(b, ",", 1); RC;
  rc = fdb__outj(b, pX->z + x, (int)(pX->n - x)); RC;
  rc = fdb__out(b, "],\n",3); RC;
  end:
  return rc;
#undef RC
}

static int fdb__json1_finish(fsl_diff_builder * const b){
  return fdb__out(b, "0]}", 3);
}

static fsl_diff_builder * fsl__diff_builder_json1(void){
  fsl_diff_builder * rc = fsl__diff_builder_alloc(0);
  if(rc){
    rc->chunkHeader = NULL;
    rc->start = fdb__json1_start;
    rc->skip = fdb__json1_skip;
    rc->common = fdb__json1_common;
    rc->insertion = fdb__json1_insertion;
    rc->deletion = fdb__json1_deletion;
    rc->replacement = fdb__json1_replacement;
    rc->edit = fdb__json1_edit;
    rc->finish = fdb__json1_finish;
    rc->finalize = fdb__generic_finalize;
    assert(!rc->pimpl);
    assert(0==rc->implFlags);
    assert(0==rc->lnLHS);
    assert(0==rc->lnRHS);
    assert(NULL==rc->cfg);
  }
  return rc;
}

#if 0
/*
  If we do unified diff line numbers, or HTML-mode unified diffs,
  we'll probably want something like this for accumulating the various
  columns.
*/
struct UnifiedTxt {
  fsl_buffer cols[5];
};
typedef struct UnifiedTxt UnifiedTxt;
static const UnifiedTxt_empty = {
{fsl_buffer_empty_m, fsl_buffer_empty_m,
 fsl_buffer_empty_m, fsl_buffer_empty_m,
 fsl_buffer_empty_m}
};
#define USTATE UnifiedTxt * const ust = (UnifiedTxt *)b->pimpl
#endif

static int fdb__utxt_start(fsl_diff_builder * const b){
  int rc = 0;
  if(0==(FSL_DIFF2_NOINDEX & b->cfg->diffFlags)){
    rc = fdb__outf(b,"Index: %s\n%.66c\n",
                   b->cfg->nameLHS/*RHS?*/, '=');
  }
  if(0==rc){
    rc = fdb__outf(b, "--- %s\n+++ %s\n",
                   b->cfg->nameLHS, b->cfg->nameRHS);
  }
  return rc;
}

static int fdb__utxt_chunkHeader(fsl_diff_builder* const b,
                                 uint32_t lnnoLHS, uint32_t linesLHS,
                                 uint32_t lnnoRHS, uint32_t linesRHS ){
  return fdb__outf(b, "@@ -%" PRIu32 ",%" PRIu32
                   " +%" PRIu32 ",%" PRIu32 " @@\n",
                   lnnoLHS, linesLHS, lnnoRHS, linesRHS);
}


static int fdb__utxt_skip(fsl_diff_builder * const b, uint32_t n, bool isFinal){
  //MARKER(("SKIP\n"));
  b->lnLHS += n;
  b->lnRHS += n;
  return 0;
}

/** Outputs line numbers to b->cfg->out. */
static int fdb__utxt_lineno(fsl_diff_builder * const b, uint32_t lnL, uint32_t lnR){
  int rc = 0;
  if(FSL_DIFF2_LINE_NUMBERS & b->cfg->diffFlags){
    rc = lnL
      ? fdb__outf(b, "%6" PRIu32 " ", lnL)
      : fdb__out(b, "       ", 7);
    if(0==rc){
      rc = lnR
      ? fdb__outf(b, "%6" PRIu32 " ", lnR)
      : fdb__out(b, "       ", 7);
    }
  }
  return rc;
}

static int fdb__utxt_common(fsl_diff_builder * const b, fsl_dline const * pLine){
  //MARKER(("COMMON\n"));
  ++b->lnLHS;
  ++b->lnRHS;
  const int rc = fdb__utxt_lineno(b, b->lnLHS, b->lnRHS);
  return rc ? rc : fdb__outf(b, " %.*s\n", (int)pLine->n, pLine->z);
}
static int fdb__utxt_insertion(fsl_diff_builder * const b, fsl_dline const * pLine){
  //MARKER(("INSERT\n"));
  ++b->lnRHS;
  const int rc = fdb__utxt_lineno(b, 0, b->lnRHS);
  return rc ? rc : fdb__outf(b, "+%.*s\n", (int)pLine->n, pLine->z);
}
static int fdb__utxt_deletion(fsl_diff_builder * const b, fsl_dline const * pLine){
  //MARKER(("DELETE\n"));
  ++b->lnLHS;
  const int rc = fdb__utxt_lineno(b, b->lnLHS, 0);
  return rc ? rc : fdb__outf(b, "-%.*s\n", (int)pLine->n, pLine->z);
}
static int fdb__utxt_replacement(fsl_diff_builder * const b,
                                 fsl_dline const * lineLhs,
                                 fsl_dline const * lineRhs) {
  //MARKER(("REPLACE\n"));
  int rc = b->deletion(b, lineLhs);
  if(0==rc) rc = b->insertion(b, lineRhs);
  return rc;
}
static int fdb__utxt_edit(fsl_diff_builder * const b,
                           fsl_dline const * lineLhs,
                           fsl_dline const * lineRhs){
  //MARKER(("EDIT\n"));
  int rc = b->deletion(b, lineLhs);
  if(0==rc) rc = b->insertion(b, lineRhs);
  return rc;
}

static void fdb__utxt_finalize(fsl_diff_builder * const b){
#if 0
  USTATE;
  for( int i = 0; i < sizeof(ust->cols)/sizeof(ust->cols[0]); ++i ){
    fsl_buffer_clear(&usr->cols[i]);
  }
  fsl_free(ust);
#endif
  fsl_free(b);
}
#undef USTATE

static fsl_diff_builder * fsl__diff_builder_utxt(void){
  fsl_diff_builder * rc = fsl__diff_builder_alloc(0);
  if(!rc) return NULL;
  /****
  UnifiedTxt * ust = fsl_malloc(sizeof(UnifiedTxt));
  if(!ust){
    fsl_free(rc);
    return NULL;
  }else{
    *usr = UnifiedTxt_empty;
  }
  *****/
  rc->chunkHeader = fdb__utxt_chunkHeader;
  rc->start = fdb__utxt_start;
  rc->skip = fdb__utxt_skip;
  rc->common = fdb__utxt_common;
  rc->insertion = fdb__utxt_insertion;
  rc->deletion = fdb__utxt_deletion;
  rc->replacement = fdb__utxt_replacement;
  rc->edit = fdb__utxt_edit;
  rc->finish = NULL;
  rc->finalize = fdb__utxt_finalize;
  return rc;
}

struct DiBuTcl {
  /** Buffer for TCL-format string conversion */
  fsl_buffer str;
};
typedef struct DiBuTcl DiBuTcl;
static const DiBuTcl DiBuTcl_empty = {fsl_buffer_empty_m};

#define BR_OPEN if(FSL_DIFF2_TCL_BRACES & b->cfg->diffFlags) \
    rc = fdb__out(b, "{", 1)
#define BR_CLOSE if(FSL_DIFF2_TCL_BRACES & b->cfg->diffFlags) \
    rc = fdb__out(b, "}", 1)

#define DTCL_BUFFER(B) &((DiBuTcl*)(B)->pimpl)->str
static int fdb__outtcl(fsl_diff_builder * const b,
                       char const *z, unsigned int n,
                       char chAppend ){
  int rc;
  fsl_buffer * const o = DTCL_BUFFER(b);
  fsl_buffer_reuse(o);
  rc = fsl_buffer_append_tcl_literal(o, z, n);
  if(0==rc) rc = fdb__out(b, (char const *)o->mem, o->used);
  if(chAppend && 0==rc) rc = fdb__out(b, &chAppend, 1);
  return rc;
}

static int fdb__tcl_start(fsl_diff_builder * const b){
  int rc = 0;
  fsl_buffer_reuse(DTCL_BUFFER(b));
  BR_OPEN;
  if(0==rc) rc = fdb__out(b, "\n", 1);
  if(0==rc && b->cfg->nameLHS){
    char const * zRHS =
      b->cfg->nameRHS ? b->cfg->nameRHS : b->cfg->nameLHS;
    BR_OPEN;
    if(0==rc) rc = fdb__out(b, "FILE ", 5);
    if(0==rc) rc = fdb__outtcl(b, b->cfg->nameLHS,
                               (unsigned)fsl_strlen(b->cfg->nameLHS), ' ');
    if(0==rc) rc = fdb__outtcl(b, zRHS,
                               (unsigned)fsl_strlen(zRHS), 0);
    if(0==rc) {BR_CLOSE;}
    if(0==rc) rc = fdb__out(b, "\n", 1);
  }
  return rc;
}

static int fdb__tcl_skip(fsl_diff_builder * const b, uint32_t n, bool isFinal){
  int rc = 0;
  BR_OPEN;
  if(0==rc) rc = fdb__outf(b, "SKIP %" PRIu32, n);
  if(0==rc) {BR_CLOSE;}
  if(0==rc) rc = fdb__outf(b, "\n", 1);
  return rc;
}

static int fdb__tcl_common(fsl_diff_builder * const b, fsl_dline const * pLine){
  int rc = 0;
  BR_OPEN;
  if(0==rc) rc = fdb__out(b, "COM  ", 5);
  if(0==rc) rc= fdb__outtcl(b, pLine->z, pLine->n, 0);
  if(0==rc) {BR_CLOSE;}
  if(0==rc) rc = fdb__outf(b, "\n", 1);
  return rc;
}
static int fdb__tcl_insertion(fsl_diff_builder * const b, fsl_dline const * pLine){
  int rc = 0;
  BR_OPEN;
  if(0==rc) rc = fdb__out(b, "INS  ", 5);
  if(0==rc) rc = fdb__outtcl(b, pLine->z, pLine->n, 0);
  if(0==rc) {BR_CLOSE;}
  if(0==rc) rc = fdb__outf(b, "\n", 1);
  return rc;
}
static int fdb__tcl_deletion(fsl_diff_builder * const b, fsl_dline const * pLine){
  int rc = 0;
  BR_OPEN;
  if(0==rc) rc = fdb__out(b, "DEL  ", 5);
  if(0==rc) rc = fdb__outtcl(b, pLine->z, pLine->n, 0);
  if(0==rc) {BR_CLOSE;}
  if(0==rc) rc = fdb__outf(b, "\n", 1);
  return rc;
}
static int fdb__tcl_replacement(fsl_diff_builder * const b,
                                fsl_dline const * lineLhs,
                                fsl_dline const * lineRhs) {
  int rc = 0;
  BR_OPEN;
  if(0==rc) rc = fdb__out(b, "EDIT \"\" ", 8);
  if(0==rc) rc = fdb__outtcl(b, lineLhs->z, lineLhs->n, ' ');
  if(0==rc) rc = fdb__outtcl(b, lineRhs->z, lineRhs->n, 0);
  if(0==rc) {BR_CLOSE;}
  if(0==rc) rc = fdb__outf(b, "\n", 1);
  return rc;
}
                 
static int fdb__tcl_edit(fsl_diff_builder * const b,
                         fsl_dline const * pX,
                         fsl_dline const * pY){
  int rc = 0;
  int i, x;
  fsl_dline_change chng = fsl_dline_change_empty;
#define RC if(rc) goto end
  BR_OPEN;
  rc = fdb__out(b, "EDIT", 4); RC;
  fsl_dline_change_spans(pX, pY, &chng);
  for(i=x=0; i<chng.n; i++){
    rc = fdb__out(b, " ", 1); RC;
    rc = fdb__outtcl(b, pX->z + x, chng.a[i].iStart1 - x, ' '); RC;
    x = chng.a[i].iStart1;
    rc = fdb__outtcl(b, pX->z + x, chng.a[i].iLen1, ' '); RC;
    x += chng.a[i].iLen1;
    rc = fdb__outtcl(b, pY->z + chng.a[i].iStart2,
                     chng.a[i].iLen2, 0); RC;
  }
  assert(0==rc);
  if( x < pX->n ){
    rc = fdb__out(b, " ", 1); RC;
    rc = fdb__outtcl(b, pX->z + x, pX->n - x, 0); RC;
  }
  BR_CLOSE; RC;
  rc = fdb__out(b, "\n", 1);
  end:
#undef RC
  return rc;
}

static int fdb__tcl_finish(fsl_diff_builder * const b){
  int rc = 0;
  BR_CLOSE;
  if(0==rc && FSL_DIFF2_TCL_BRACES & b->cfg->diffFlags){
    rc = fdb__out(b, "\n", 1);
  }
  return rc;
}
#undef BR_OPEN
#undef BR_CLOSE

static void fdb__tcl_finalize(fsl_diff_builder * const b){
  fsl_buffer_clear( &((DiBuTcl*)b->pimpl)->str );
  *b = fsl_diff_builder_empty;
  fsl_free(b);
}

static fsl_diff_builder * fsl__diff_builder_tcl(void){
  fsl_diff_builder * rc =
    fsl__diff_builder_alloc((fsl_size_t)sizeof(DiBuTcl));
  if(rc){
    rc->chunkHeader = NULL;
    rc->start = fdb__tcl_start;
    rc->skip = fdb__tcl_skip;
    rc->common = fdb__tcl_common;
    rc->insertion = fdb__tcl_insertion;
    rc->deletion = fdb__tcl_deletion;
    rc->replacement = fdb__tcl_replacement;
    rc->edit = fdb__tcl_edit;
    rc->finish = fdb__tcl_finish;
    rc->finalize = fdb__tcl_finalize;
    assert(0!=rc->pimpl);
    DiBuTcl * const dbt = (DiBuTcl*)rc->pimpl;
    *dbt = DiBuTcl_empty;
    if(fsl_buffer_reserve(&dbt->str, 120)){
      rc->finalize(rc);
      rc = 0;
    }
  }
  return rc;
}

int fsl_diff_builder_factory( fsl_diff_builder_e type,
                              fsl_diff_builder **pOut ){
  int rc = FSL_RC_TYPE;
  fsl_diff_builder * (*factory)(void) = NULL;
  switch(type){
    case FSL_DIFF_BUILDER_DEBUG:
      factory = fsl__diff_builder_debug;
      break;
    case FSL_DIFF_BUILDER_JSON1:
      factory = fsl__diff_builder_json1;
      break;
    case FSL_DIFF_BUILDER_UNIFIED_TEXT:
      factory = fsl__diff_builder_utxt;
      break;
    case FSL_DIFF_BUILDER_TCL:
      factory = fsl__diff_builder_tcl;
      break;
    case FSL_DIFF_BUILDER_SBS_TEXT:
      break;
  }
  if(NULL!=factory){
    *pOut = factory();
    rc = *pOut ? 0 : FSL_RC_OOM;
  }
  return rc;
}

#undef DIFF_ALIGN_MX
#undef DIFF_CANNOT_COMPUTE_BINARY
#undef DIFF_CANNOT_COMPUTE_SYMLINK
#undef DIFF_TOO_MANY_CHANGES
#undef DIFF_WHITESPACE_ONLY
#undef LENGTH_MASK
#undef LENGTH_MASK_SZ
#undef fsl_dline_empty_m
#undef MARKER
#undef DTCL_BUFFER
#undef blob_to_utf8_no_bom