/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim: set ts=2 et sw=2 tw=80: */ /* ** Copyright (c) 2013 D. Richard Hipp ** ** This program is free software; you can redistribute it and/or ** modify it under the terms of the Simplified BSD License (also ** known as the "2-Clause License" or "FreeBSD License".) ** ** This program is distributed in the hope that it will be useful, ** but without any warranty; without even the implied warranty of ** merchantability or fitness for a particular purpose. ** ** Author contact information: ** drh@hwaci.com ** http://www.hwaci.com/drh/ ** ******************************************************************************* ** This file houses Fossil's diff-generation routines (as opposed to ** the delta-generation). */ #include "fossil-scm/fossil.h" #include #include #include typedef fsl_uint64_t u64; typedef void ReCompiled /* porting crutch. i would strongly prefer to replace the regex support with a stateful predicate callback. */; #define TO_BE_STATIC /* Porting crutch for unused static funcs */ #define DIFF_CONTEXT_MASK ((u64)0x0000ffff) /* Lines of context. Default if 0 */ #define DIFF_WIDTH_MASK ((u64)0x00ff0000) /* side-by-side column width */ #define DIFF_IGNORE_EOLWS ((u64)0x01000000) /* Ignore end-of-line whitespace */ #define DIFF_SIDEBYSIDE ((u64)0x02000000) /* Generate a side-by-side diff */ #define DIFF_VERBOSE ((u64)0x04000000) /* Missing shown as empty files */ #define DIFF_BRIEF ((u64)0x08000000) /* Show filenames only */ #define DIFF_INLINE ((u64)0x00000000) /* Inline (not side-by-side) diff */ #define DIFF_HTML ((u64)0x10000000) /* Render for HTML */ #define DIFF_LINENO ((u64)0x20000000) /* Show line numbers */ #define DIFF_WS_WARNING ((u64)0x40000000) /* Warn about whitespace */ #define DIFF_NOOPT (((u64)0x01)<<32) /* Suppress optimizations (debug) */ #define DIFF_INVERT (((u64)0x02)<<32) /* Invert the diff (debug) */ #define DIFF_CONTEXT_EX (((u64)0x04)<<32) /* Use context even if zero */ #define DIFF_NOTTOOBIG (((u64)0x08)<<32) /* Only display if not too big */ /** ** Maximum length of a line in a text file, in bytes. (2**13 = 8192 bytes) */ #define LENGTH_MASK_SZ 13 #define LENGTH_MASK ((1<h & LENGTH_MASK) /* ** Return an array of DLine objects containing a pointer to the ** start of each line and a hash of that line. The lower ** bits of the hash store the length of each line. ** ** Trailing whitespace is removed from each line. 2010-08-20: Not any ** more. If trailing whitespace is ignored, the "patch" command gets ** confused by the diff output. Ticket [a9f7b23c2e376af5b0e5b] ** ** Return 0 if the file is binary or contains a line that is ** too long. ** ** Profiling show that in most cases this routine consumes the bulk of ** the CPU time on a diff. */ static int break_into_lines(const char *z, int n, int *pnLine, DLine **pOut, int ignoreWS){ int nLine, i, j, k, x; unsigned int h, h2; DLine *a; /* Count the number of lines. Allocate space to hold ** the returned array. */ for(i=j=0, nLine=1; iLENGTH_MASK ){ return FSL_RC_RANGE; } j = 0; } } if( j>LENGTH_MASK ){ return FSL_RC_RANGE; } a = fsl_malloc( nLine*sizeof(a[0]) ); if(!a) return FSL_RC_OOM; memset(a, 0, nLine*sizeof(a[0]) ); if( n==0 ){ *pnLine = 0; *pOut = a; return 0; } /* Fill in the array */ for(i=0; i0 && fsl_isspace(z[k-1]) ){ k--; } for(h=0, x=0; x<=k; x++){ h = h ^ (h<<2) ^ z[x]; } a[i].h = h = (h<h==pB->h && memcmp(pA->z,pB->z,pA->h & LENGTH_MASK)==0; } /* ** A context for running a raw diff. ** ** The aEdit[] array describes the raw diff. Each triple of integers in ** aEdit[] means: ** ** (1) COPY: Number of lines aFrom and aTo have in common ** (2) DELETE: Number of lines found only in aFrom ** (3) INSERT: Number of lines found only in aTo ** ** The triples repeat until all lines of both aFrom and aTo are accounted ** for. */ typedef struct DContext DContext; struct DContext { int *aEdit; /* Array of copy/delete/insert triples */ int nEdit; /* Number of integers (3x num of triples) in aEdit[] */ int nEditAlloc; /* Space allocated for aEdit[] */ DLine *aFrom; /* File on left side of the diff */ int nFrom; /* Number of lines in aFrom[] */ DLine *aTo; /* File on right side of the diff */ int nTo; /* Number of lines in aTo[] */ }; /** ** Holds output state for diff generation. */ struct DiffOutState { /** Output callback. */ fsl_output_f out; /** State for this->out(). */ void * oState; /** For propagating output errors. */ int rc; }; typedef struct DiffOutState DiffOutState; static const DiffOutState DiffOutState_empty = {NULL, NULL, 0}; /** ** Internal helper. Sends src to o->out(). If n is negative, fsl_strlen() ** is used to determine the length. */ static int diff_out( DiffOutState * o, void const * src, fsl_int_t n ){ return o->rc = n ? o->out(o->oState, src, (n<0)?fsl_strlen((char const *)src):(fsl_size_t)n) : 0; } /** ** fsl_appendf_f() impl for use with diff_outf(). state must be a ** (DiffOutState*). */ static fsl_int_t fsl_appendf_f_diff_out( void * state, char const * s, fsl_int_t n ){ DiffOutState * os = (DiffOutState *)state; diff_out(os, s, n); return os->rc ? -1 : n; } static int diff_outf( DiffOutState * o, char const * fmt, ... ){ va_list va; va_start(va,fmt); fsl_appendfv(fsl_appendf_f_diff_out, o, fmt, va); va_end(va); return o->rc; } /* ** Append a single line of context-diff output to pOut. */ static int appendDiffLine( DiffOutState *pOut, /* Where to write the line of output */ char cPrefix, /* One of " ", "+", or "-" */ DLine *pLine, /* The line to be output */ int html, /* True if generating HTML. False for plain text */ ReCompiled *pRe /* Colorize only if line matches this Regex */ ){ int rc; rc = diff_out(pOut, &cPrefix, 1); if(rc) return rc; else if( html ){ /* char *zHtml; */ if( pRe /*MISSING: && re_dline_match(pRe, pLine, 1)==0 */ ){ cPrefix = ' '; }else if( cPrefix=='+' ){ rc = diff_out(pOut, "", -1); }else if( cPrefix=='-' ){ rc = diff_out(pOut, "", -1); } if(!rc){ rc = pOut->rc = fsl_htmlize(pOut->out, pOut->oState, pLine->z, (pLine->h & LENGTH_MASK)); if( !rc && cPrefix!=' ' ){ rc = diff_out(pOut, "", -1); } } }else{ rc = diff_out(pOut, pLine->z, pLine->h & LENGTH_MASK); } if(!rc) rc = diff_out(pOut, "\n", 1); return rc; } /* ** Add two line numbers to the beginning of an output line for a context ** diff. One or the other of the two numbers might be zero, which means ** to leave that number field blank. The "html" parameter means to format ** the output for HTML. */ static int appendDiffLineno(DiffOutState *pOut, int lnA, int lnB, int html){ int rc = 0; if( html ){ rc = diff_out(pOut, "", -1); } if(!rc){ if( lnA>0 ){ rc = diff_outf(pOut, "%6d ", lnA); }else{ rc = diff_out(pOut, " ", 7); } } if(!rc){ if( lnB>0 ){ rc = diff_outf(pOut, "%6d ", lnB); }else{ rc = diff_out(pOut, " ", 8); } if( !rc && html ){ rc = diff_out(pOut, "", -1); } } return rc; } /* ** Minimum of two values */ #define minInt(a,b) (((a)<(b)) ? (a) : (b)) /** ** Compute the optimal longest common subsequence (LCS) using an ** exhaustive search. This version of the LCS is only used for ** shorter input strings since runtime is O(N*N) where N is the ** input string length. */ static void optimalLCS( DContext *p, /* Two files being compared */ int iS1, int iE1, /* Range of lines in p->aFrom[] */ int iS2, int iE2, /* Range of lines in p->aTo[] */ int *piSX, int *piEX, /* Write p->aFrom[] common segment here */ int *piSY, int *piEY /* Write p->aTo[] common segment here */ ){ int mxLength = 0; /* Length of longest common subsequence */ int i, j; /* Loop counters */ int k; /* Length of a candidate subsequence */ int iSXb = iS1; /* Best match so far */ int iSYb = iS2; /* Best match so far */ for(i=iS1; iaFrom[i], &p->aTo[j]) ) continue; if( mxLength && !same_dline(&p->aFrom[i+mxLength], &p->aTo[j+mxLength]) ){ continue; } k = 1; while( i+kaFrom[i+k],&p->aTo[j+k]) ){ k++; } if( k>mxLength ){ iSXb = i; iSYb = j; mxLength = k; } } } *piSX = iSXb; *piEX = iSXb + mxLength; *piSY = iSYb; *piEY = iSYb + mxLength; } /** ** Compare two blocks of text on lines iS1 through iE1-1 of the aFrom[] ** file and lines iS2 through iE2-1 of the aTo[] file. Locate a sequence ** of lines in these two blocks that are exactly the same. Return ** the bounds of the matching sequence. ** ** If there are two or more possible answers of the same length, the ** returned sequence should be the one closest to the center of the ** input range. ** ** Ideally, the common sequence should be the longest possible common ** sequence. However, an exact computation of LCS is O(N*N) which is ** way too slow for larger files. So this routine uses an O(N) ** heuristic approximation based on hashing that usually works about ** as well. But if the O(N) algorithm doesn't get a good solution ** and N is not too large, we fall back to an exact solution by ** calling optimalLCS(). */ static void longestCommonSequence( DContext *p, /* Two files being compared */ int iS1, int iE1, /* Range of lines in p->aFrom[] */ int iS2, int iE2, /* Range of lines in p->aTo[] */ int *piSX, int *piEX, /* Write p->aFrom[] common segment here */ int *piSY, int *piEY /* Write p->aTo[] common segment here */ ){ int i, j, k; /* Loop counters */ int n; /* Loop limit */ DLine *pA, *pB; /* Pointers to lines */ int iSX, iSY, iEX, iEY; /* Current match */ int skew = 0; /* How lopsided is the match */ int dist = 0; /* Distance of match from center */ int mid; /* Center of the span */ int iSXb, iSYb, iEXb, iEYb; /* Best match so far */ int iSXp, iSYp, iEXp, iEYp; /* Previous match */ sqlite3_int64 bestScore; /* Best score so far */ sqlite3_int64 score; /* Score for current candidate LCS */ int span; /* combined width of the input sequences */ span = (iE1 - iS1) + (iE2 - iS2); bestScore = -10000; score = 0; iSXb = iSXp = iS1; iEXb = iEXp = iS1; iSYb = iSYp = iS2; iEYb = iEYp = iS2; mid = (iE1 + iS1)/2; for(i=iS1; iaTo[p->aFrom[i].h % p->nTo].iHash; while( j>0 && (j-1=iE2 || !same_dline(&p->aFrom[i], &p->aTo[j-1])) ){ if( limit++ > 10 ){ j = 0; break; } j = p->aTo[j-1].iNext; } if( j==0 ) continue; assert( i>=iSXb && i>=iSXp ); if( i=iSYb && j=iSYp && jaFrom[iSX-1]; pB = &p->aTo[iSY-1]; n = minInt(iSX-iS1, iSY-iS2); for(k=0; kaFrom[iEX]; pB = &p->aTo[iEY]; n = minInt(iE1-iEX, iE2-iEY); for(k=0; kbestScore ){ bestScore = score; iSXb = iSX; iSYb = iSY; iEXb = iEX; iEYb = iEY; }else if( iEX>iEXp ){ iSXp = iSX; iSYp = iSY; iEXp = iEX; iEYp = iEY; } } if( iSXb==iEXb && (iE1-iS1)*(iE2-iS2)<400 ){ /* If no common sequence is found using the hashing heuristic and ** the input is not too big, use the expensive exact solution */ optimalLCS(p, iS1, iE1, iS2, iE2, piSX, piEX, piSY, piEY); }else{ *piSX = iSXb; *piSY = iSYb; *piEX = iEXb; *piEY = iEYb; } } /** ** Expand the size of p->aEdit array to hold at least nEdit elements. */ static int expandEdit(DContext *p, int nEdit){ void * re = fsl_realloc(p->aEdit, nEdit*sizeof(int)); if(!re) return FSL_RC_OOM; else{ p->aEdit = (int*)re; p->nEditAlloc = nEdit; return 0; } } /** ** Append a new COPY/DELETE/INSERT triple. ** ** Returns 0 on success, FSL_RC_OOM on OOM. */ static int appendTriple(DContext *p, int nCopy, int nDel, int nIns){ /* printf("APPEND %d/%d/%d\n", nCopy, nDel, nIns); */ if( p->nEdit>=3 ){ if( p->aEdit[p->nEdit-1]==0 ){ if( p->aEdit[p->nEdit-2]==0 ){ p->aEdit[p->nEdit-3] += nCopy; p->aEdit[p->nEdit-2] += nDel; p->aEdit[p->nEdit-1] += nIns; return 0; } if( nCopy==0 ){ p->aEdit[p->nEdit-2] += nDel; p->aEdit[p->nEdit-1] += nIns; return 0; } } if( nCopy==0 && nDel==0 ){ p->aEdit[p->nEdit-1] += nIns; return 0; } } if( p->nEdit+3>p->nEditAlloc ){ int const rc = expandEdit(p, p->nEdit*2 + 15); if(rc) return rc; else if( p->aEdit==0 ) return 0; } p->aEdit[p->nEdit++] = nCopy; p->aEdit[p->nEdit++] = nDel; p->aEdit[p->nEdit++] = nIns; return 0; } /** ** Do a single step in the difference. Compute a sequence of ** copy/delete/insert steps that will convert lines iS1 through iE1-1 ** of the input into lines iS2 through iE2-1 of the output and write ** that sequence into the difference context. ** ** The algorithm is to find a block of common text near the middle of ** the two segments being diffed. Then recursively compute ** differences on the blocks before and after that common segment. ** Special cases apply if either input segment is empty or if the two ** segments have no text in common. */ static int diff_step(DContext *p, int iS1, int iE1, int iS2, int iE2){ int iSX, iEX, iSY, iEY; int rc = 0; if( iE1<=iS1 ){ /* The first segment is empty */ if( iE2>iS2 ){ rc = appendTriple(p, 0, 0, iE2-iS2); } return rc; } if( iE2<=iS2 ){ /* The second segment is empty */ return appendTriple(p, 0, iE1-iS1, 0); } /* Find the longest matching segment between the two sequences */ longestCommonSequence(p, iS1, iE1, iS2, iE2, &iSX, &iEX, &iSY, &iEY); if( iEX>iSX ){ /* A common segment has been found. ** Recursively diff either side of the matching segment */ rc = diff_step(p, iS1, iSX, iS2, iSY); if( !rc){ if(iEX>iSX){ rc = appendTriple(p, iEX - iSX, 0, 0); } if(!rc) rc = diff_step(p, iEX, iE1, iEY, iE2); } }else{ /* The two segments have nothing in common. Delete the first then ** insert the second. */ rc = appendTriple(p, 0, iE1-iS1, iE2-iS2); } return rc; } /** ** Compute the differences between two files already loaded into ** the DContext structure. ** ** A divide and conquer technique is used. We look for a large ** block of common text that is in the middle of both files. Then ** compute the difference on those parts of the file before and ** after the common block. This technique is fast, but it does ** not necessarily generate the minimum difference set. On the ** other hand, we do not need a minimum difference set, only one ** that makes sense to human readers, which this algorithm does. ** ** Any common text at the beginning and end of the two files is ** removed before starting the divide-and-conquer algorithm. ** ** Returns 0 on succes, FSL_RC_OOM on an allocation error. */ static int diff_all(DContext *p){ int mnE, iS, iE1, iE2; int rc = 0; /* Carve off the common header and footer */ iE1 = p->nFrom; iE2 = p->nTo; while( iE1>0 && iE2>0 && same_dline(&p->aFrom[iE1-1], &p->aTo[iE2-1]) ){ iE1--; iE2--; } mnE = iE1aFrom[iS],&p->aTo[iS]); iS++){} /* do the difference */ if( iS>0 ){ rc = appendTriple(p, iS, 0, 0); if(rc) return rc; } rc = diff_step(p, iS, iE1, iS, iE2); if(rc) return rc; if( iE1nFrom ){ rc = appendTriple(p, p->nFrom - iE1, 0, 0); if(rc) return rc; } /* Terminate the COPY/DELETE/INSERT triples with three zeros */ rc = expandEdit(p, p->nEdit+3); if(rc) return rc; else if( p->aEdit ){ p->aEdit[p->nEdit++] = 0; p->aEdit[p->nEdit++] = 0; p->aEdit[p->nEdit++] = 0; } return rc; } /* ** Attempt to shift insertion or deletion blocks so that they begin and ** end on lines that are pure whitespace. In other words, try to transform ** this: ** ** int func1(int x){ ** return x*10; ** +} ** + ** +int func2(int x){ ** + return x*20; ** } ** ** int func3(int x){ ** return x/5; ** } ** ** Into one of these: ** ** int func1(int x){ int func1(int x){ ** return x*10; return x*10; ** } } ** + ** +int func2(int x){ +int func2(int x){ ** + return x*20; + return x*20; ** +} +} ** + ** int func3(int x){ int func3(int x){ ** return x/5; return x/5; ** } } */ static void diff_optimize(DContext *p){ int r; /* Index of current triple */ int lnFrom; /* Line number in p->aFrom */ int lnTo; /* Line number in p->aTo */ int cpy, del, ins; lnFrom = lnTo = 0; for(r=0; rnEdit; r += 3){ cpy = p->aEdit[r]; del = p->aEdit[r+1]; ins = p->aEdit[r+2]; lnFrom += cpy; lnTo += cpy; /* Shift insertions toward the beginning of the file */ while( cpy>0 && del==0 && ins>0 ){ DLine *pTop = &p->aFrom[lnFrom-1]; /* Line before start of insert */ DLine *pBtm = &p->aTo[lnTo+ins-1]; /* Last line inserted */ if( same_dline(pTop, pBtm)==0 ) break; if( LENGTH(pTop+1)+LENGTH(pBtm)<=LENGTH(pTop)+LENGTH(pBtm-1) ) break; lnFrom--; lnTo--; p->aEdit[r]--; p->aEdit[r+3]++; cpy--; } /* Shift insertions toward the end of the file */ while( r+3nEdit && p->aEdit[r+3]>0 && del==0 && ins>0 ){ DLine *pTop = &p->aTo[lnTo]; /* First line inserted */ DLine *pBtm = &p->aTo[lnTo+ins]; /* First line past end of insert */ if( same_dline(pTop, pBtm)==0 ) break; if( LENGTH(pTop)+LENGTH(pBtm-1)<=LENGTH(pTop+1)+LENGTH(pBtm) ) break; lnFrom++; lnTo++; p->aEdit[r]++; p->aEdit[r+3]--; cpy++; } /* Shift deletions toward the beginning of the file */ while( cpy>0 && del>0 && ins==0 ){ DLine *pTop = &p->aFrom[lnFrom-1]; /* Line before start of delete */ DLine *pBtm = &p->aFrom[lnFrom+del-1]; /* Last line deleted */ if( same_dline(pTop, pBtm)==0 ) break; if( LENGTH(pTop+1)+LENGTH(pBtm)<=LENGTH(pTop)+LENGTH(pBtm-1) ) break; lnFrom--; lnTo--; p->aEdit[r]--; p->aEdit[r+3]++; cpy--; } /* Shift deletions toward the end of the file */ while( r+3nEdit && p->aEdit[r+3]>0 && del>0 && ins==0 ){ DLine *pTop = &p->aFrom[lnFrom]; /* First line deleted */ DLine *pBtm = &p->aFrom[lnFrom+del]; /* First line past end of delete */ if( same_dline(pTop, pBtm)==0 ) break; if( LENGTH(pTop)+LENGTH(pBtm-1)<=LENGTH(pTop)+LENGTH(pBtm) ) break; lnFrom++; lnTo++; p->aEdit[r]++; p->aEdit[r+3]--; cpy++; } lnFrom += del; lnTo += ins; } } /* ** Given a raw diff p[] in which the p->aEdit[] array has been filled ** in, compute a context diff into pOut. */ static int contextDiff( DContext *p, /* The difference */ DiffOutState *pOut, /* Output a context diff to here */ ReCompiled *pRe, /* Only show changes that match this regex */ u64 diffFlags /* Flags controlling the diff format */ ){ DLine *A; /* Left side of the diff */ DLine *B; /* Right side of the diff */ int a = 0; /* Index of next line in A[] */ int b = 0; /* Index of next line in B[] */ int *R; /* Array of COPY/DELETE/INSERT triples */ int r; /* Index into R[] */ int nr; /* Number of COPY/DELETE/INSERT triples to process */ int mxr; /* Maximum value for r */ int na, nb; /* Number of lines shown from A and B */ int i, j; /* Loop counters */ int m; /* Number of lines to output */ int skip; /* Number of lines to skip */ static int nChunk = 0; /* Number of diff chunks seen so far */ int nContext; /* Number of lines of context */ int showLn; /* Show line numbers */ int html; /* Render as HTML */ int showDivider = 0; /* True to show the divider between diff blocks */ int rc = 0; nContext = diff_context_lines(diffFlags); showLn = (diffFlags & DIFF_LINENO)!=0; html = (diffFlags & DIFF_HTML)!=0; A = p->aFrom; B = p->aTo; R = p->aEdit; mxr = p->nEdit; while( mxr>2 && R[mxr-1]==0 && R[mxr-2]==0 ){ mxr -= 3; } for(r=0; r0 && R[r+nr*3]nContext ){ na = nb = nContext; skip = R[r] - nContext; }else{ na = nb = R[r]; skip = 0; } for(i=0; inContext ){ na += nContext; nb += nContext; }else{ na += R[r+nr*3]; nb += R[r+nr*3]; } for(i=1; i%.80c\n", '.'); }else{ rc = diff_outf(pOut, "%.80c\n", '.'); } if( !rc && html ){ rc = diff_outf(pOut, "", nChunk); } }else{ if( html ) rc = diff_outf(pOut, ""); /* * If the patch changes an empty file or results in an empty file, * the block header must use 0,0 as position indicator and not 1,0. * Otherwise, patch would be confused and may reject the diff. */ if(!rc) rc = diff_outf(pOut,"@@ -%d,%d +%d,%d @@", na ? a+skip+1 : 0, na, nb ? b+skip+1 : 0, nb); if( !rc ){ if( html ) rc = diff_outf(pOut, ""); if(!rc) rc = diff_out(pOut, "\n", 1); } } if(rc) return rc; /* Show the initial common area */ a += skip; b += skip; m = R[r] - skip; for(j=0; !rc && jnContext ) m = nContext; for(j=0; !rc && j tag */ int iEnd; /* Write prior to character iEnd */ int iStart2; /* Write zStart2 prior to character iStart2 */ const char *zStart2; /* A tag */ int iEnd2; /* Write prior to character iEnd2 */ ReCompiled *pRe; /* Only colorize matching lines, if not NULL */ }; /* ** Column indices for SbsLine.apCols[] */ #define SBS_LNA 0 /* Left line number */ #define SBS_TXTA 1 /* Left text */ #define SBS_MKR 2 /* Middle separator column */ #define SBS_LNB 3 /* Right line number */ #define SBS_TXTB 4 /* Right text */ /* ** Append newlines to all columns. */ static int sbsWriteNewlines(SbsLine *p){ int i; int rc = 0; for( i=p->escHtml ? SBS_LNA : SBS_TXTB; !rc && i<=SBS_TXTB; i++ ){ rc = fsl_buffer_append(p->apCols[i], "\n", 1); } return rc; } /* ** Append n spaces to the column. */ static int sbsWriteSpace(SbsLine *p, int n, int col){ return fsl_buffer_appendf(p->apCols[col], "%*s", n, ""); } /* ** Write the text of pLine into column iCol of p. ** ** If outputting HTML, write the full line. Otherwise, only write the ** width characters. Translate tabs into spaces. Add newlines if col ** is SBS_TXTB. Translate HTML characters if escHtml is true. Pad the ** rendering to width bytes if col is SBS_TXTA and escHtml is false. ** ** This comment contains multibyte unicode characters (ü, Æ, ð) in order ** to test the ability of the diff code to handle such characters. */ static int sbsWriteText(SbsLine *p, DLine *pLine, int col){ fsl_buffer *pCol = p->apCols[col]; int rc = 0; int n = pLine->h & LENGTH_MASK; int i; /* Number of input characters consumed */ int k; /* Cursor position */ int needEndSpan = 0; const char *zIn = pLine->z; int w = p->width; int colorize = p->escHtml; #if 0 /* MISSING: re bits, but want to replace those with a predicate. */ if( colorize && p->pRe && re_dline_match(p->pRe, pLine, 1)==0 ){ colorize = 0; } #endif for(i=k=0; !rc && (p->escHtml || kiStart ){ int x = strlen(p->zStart); rc = fsl_buffer_append(pCol, p->zStart, x); if(rc) break; needEndSpan = 1; if( p->iStart2 ){ p->iStart = p->iStart2; p->zStart = p->zStart2; p->iStart2 = 0; } }else if( i==p->iEnd ){ rc = fsl_buffer_append(pCol, "", 7); if(rc) break; needEndSpan = 0; if( p->iEnd2 ){ p->iEnd = p->iEnd2; p->iEnd2 = 0; } } } if( c=='\t' && !p->escHtml ){ rc = fsl_buffer_append(pCol, " ", 1); while( !rc && (k&7)!=7 && (p->escHtml || kescHtml ){ rc = fsl_buffer_append(pCol, "<", 4); }else if( c=='&' && p->escHtml ){ rc = fsl_buffer_append(pCol, "&", 5); }else if( c=='>' && p->escHtml ){ rc = fsl_buffer_append(pCol, ">", 4); }else if( c=='"' && p->escHtml ){ rc = fsl_buffer_append(pCol, """, 6); }else{ rc = fsl_buffer_append(pCol, &zIn[i], 1); if( (c&0xc0)==0x80 ) k--; } } if( !rc && needEndSpan ){ rc = fsl_buffer_append(pCol, "", 7); } if(!rc){ if( col==SBS_TXTB ){ rc = sbsWriteNewlines(p); }else if( !p->escHtml ){ rc = sbsWriteSpace(p, w-k, SBS_TXTA); } } return rc; } /* ** Append a column to the final output blob. */ static int sbsWriteColumn(DiffOutState *pOut, fsl_buffer const *pCol, int col){ return diff_outf(pOut, "
\n" "
\n"
    "%b"
    "
\n" "
\n", col % 3 ? (col == SBS_MKR ? "mkr" : "txt") : "ln", pCol ); } /* ** Append a separator line to column iCol */ static int sbsWriteSep(SbsLine *p, int len, int col){ char ch = '.'; if( len<1 ){ len = 1; ch = ' '; } return fsl_buffer_appendf(p->apCols[col], "%.*c\n", len, ch); } /* ** Append the appropriate marker into the center column of the diff. */ static int sbsWriteMarker(SbsLine *p, const char *zTxt, const char *zHtml){ return fsl_buffer_append(p->apCols[SBS_MKR], p->escHtml ? zHtml : zTxt, -1); } /* ** Append a line number to the column. */ static int sbsWriteLineno(SbsLine *p, int ln, int col){ int rc; if( p->escHtml ){ rc = fsl_buffer_appendf(p->apCols[col], "%d", ln+1); }else{ char zLn[8]; fsl_snprintf(zLn, 8, "%5d ", ln+1); rc = fsl_buffer_appendf(p->apCols[col], "%s ", zLn); } return rc; } /* ** The two text segments zLeft and zRight are known to be different on ** both ends, but they might have a common segment in the middle. If ** they do not have a common segment, return 0. If they do have a large ** common segment, return 1 and before doing so set: ** ** aLCS[0] = start of the common segment in zLeft ** aLCS[1] = end of the common segment in zLeft ** aLCS[2] = start of the common segment in zLeft ** aLCS[3] = end of the common segment in zLeft ** ** This computation is for display purposes only and does not have to be ** optimal or exact. */ static int textLCS( const char *zLeft, int nA, /* String on the left */ const char *zRight, int nB, /* String on the right */ int *aLCS /* Identify bounds of LCS here */ ){ const unsigned char *zA = (const unsigned char*)zLeft; /* left string */ const unsigned char *zB = (const unsigned char*)zRight; /* right string */ int nt; /* Number of target points */ int ti[3]; /* Index for start of each 4-byte target */ unsigned int target[3]; /* 4-byte alignment targets */ unsigned int probe; /* probe to compare against target */ int iAS, iAE, iBS, iBE; /* Range of common segment */ int i, j; /* Loop counters */ int rc = 0; /* Result code. 1 for success */ if( nA<6 || nB<6 ) return 0; memset(aLCS, 0, sizeof(int)*4); ti[0] = i = nB/2-2; target[0] = (zB[i]<<24) | (zB[i+1]<<16) | (zB[i+2]<<8) | zB[i+3]; probe = 0; if( nB<16 ){ nt = 1; }else{ ti[1] = i = nB/4-2; target[1] = (zB[i]<<24) | (zB[i+1]<<16) | (zB[i+2]<<8) | zB[i+3]; ti[2] = i = (nB*3)/4-2; target[2] = (zB[i]<<24) | (zB[i+1]<<16) | (zB[i+2]<<8) | zB[i+3]; nt = 3; } probe = (zA[0]<<16) | (zA[1]<<8) | zA[2]; for(i=3; i0 && iBS>0 && zA[iAS-1]==zB[iBS-1] ){ iAS--; iBS--; } if( iAE-iAS > aLCS[1] - aLCS[0] ){ aLCS[0] = iAS; aLCS[1] = iAE; aLCS[2] = iBS; aLCS[3] = iBE; rc = 1; } } } } return rc; } /* ** Try to shift iStart as far as possible to the left. */ static void sbsShiftLeft(SbsLine *p, const char *z){ int i, j; while( (i=p->iStart)>0 && z[i-1]==z[i] ){ for(j=i+1; jiEnd && z[j-1]==z[j]; j++){} if( jiEnd ) break; p->iStart--; p->iEnd--; } } /* ** Simplify iStart and iStart2: ** ** * If iStart is a null-change then move iStart2 into iStart ** * Make sure any null-changes are in canonoical form. ** * Make sure all changes are at character boundaries for ** multi-byte characters. */ static void sbsSimplifyLine(SbsLine *p, const char *z){ if( p->iStart2==p->iEnd2 ){ p->iStart2 = p->iEnd2 = 0; }else if( p->iStart2 ){ while( p->iStart2>0 && (z[p->iStart2]&0xc0)==0x80 ) p->iStart2--; while( (z[p->iEnd2]&0xc0)==0x80 ) p->iEnd2++; } if( p->iStart==p->iEnd ){ p->iStart = p->iStart2; p->iEnd = p->iEnd2; p->zStart = p->zStart2; p->iStart2 = 0; p->iEnd2 = 0; } if( p->iStart==p->iEnd ){ p->iStart = p->iEnd = -1; }else if( p->iStart>0 ){ while( p->iStart>0 && (z[p->iStart]&0xc0)==0x80 ) p->iStart--; while( (z[p->iEnd]&0xc0)==0x80 ) p->iEnd++; } } /* ** Write out lines that have been edited. Adjust the highlight to cover ** only those parts of the line that actually changed. */ static int sbsWriteLineChange( SbsLine *p, /* The SBS output line */ DLine *pLeft, /* Left line of the change */ int lnLeft, /* Line number for the left line */ DLine *pRight, /* Right line of the change */ int lnRight /* Line number of the right line */ ){ int rc = 0; int nLeft; /* Length of left line in bytes */ int nRight; /* Length of right line in bytes */ int nShort; /* Shortest of left and right */ int nPrefix; /* Length of common prefix */ int nSuffix; /* Length of common suffix */ const char *zLeft; /* Text of the left line */ const char *zRight; /* Text of the right line */ int nLeftDiff; /* nLeft - nPrefix - nSuffix */ int nRightDiff; /* nRight - nPrefix - nSuffix */ int aLCS[4]; /* Bounds of common middle segment */ static const char zClassRm[] = ""; static const char zClassAdd[] = ""; static const char zClassChng[] = ""; nLeft = pLeft->h & LENGTH_MASK; zLeft = pLeft->z; nRight = pRight->h & LENGTH_MASK; zRight = pRight->z; nShort = nLeft0 && (zLeft[nPrefix]&0xc0)==0x80 ) nPrefix--; } nSuffix = 0; if( nPrefix0 && (zLeft[nLeft-nSuffix]&0xc0)==0x80 ) nSuffix--; } if( nSuffix==nLeft || nSuffix==nRight ) nPrefix = 0; } if( nPrefix+nSuffix > nShort ) nPrefix = nShort - nSuffix; /* A single chunk of text inserted on the right */ if( nPrefix+nSuffix==nLeft ){ rc = sbsWriteLineno(p, lnLeft, SBS_LNA); if(rc) return rc; p->iStart2 = p->iEnd2 = 0; p->iStart = p->iEnd = -1; rc = sbsWriteText(p, pLeft, SBS_TXTA); if( !rc && nLeft==nRight && zLeft[nLeft]==zRight[nRight] ){ rc = sbsWriteMarker(p, " ", ""); }else{ rc = sbsWriteMarker(p, " | ", "|"); } if(!rc){ rc = sbsWriteLineno(p, lnRight, SBS_LNB); if(!rc){ p->iStart = nPrefix; p->iEnd = nRight - nSuffix; p->zStart = zClassAdd; rc = sbsWriteText(p, pRight, SBS_TXTB); } } return rc; } /* A single chunk of text deleted from the left */ if( nPrefix+nSuffix==nRight ){ /* Text deleted from the left */ rc = sbsWriteLineno(p, lnLeft, SBS_LNA); if(!rc){ p->iStart2 = p->iEnd2 = 0; p->iStart = nPrefix; p->iEnd = nLeft - nSuffix; p->zStart = zClassRm; rc = sbsWriteText(p, pLeft, SBS_TXTA); if(!rc){ rc = sbsWriteMarker(p, " | ", "|"); if(!rc){ rc = sbsWriteLineno(p, lnRight, SBS_LNB); if(!rc){ p->iStart = p->iEnd = -1; sbsWriteText(p, pRight, SBS_TXTB); } } } } return rc; } /* At this point we know that there is a chunk of text that has ** changed between the left and the right. Check to see if there ** is a large unchanged section in the middle of that changed block. */ nLeftDiff = nLeft - nSuffix - nPrefix; nRightDiff = nRight - nSuffix - nPrefix; if( p->escHtml && nLeftDiff >= 6 && nRightDiff >= 6 && textLCS(&zLeft[nPrefix], nLeftDiff, &zRight[nPrefix], nRightDiff, aLCS) ){ rc = sbsWriteLineno(p, lnLeft, SBS_LNA); if(rc) return rc; p->iStart = nPrefix; p->iEnd = nPrefix + aLCS[0]; if( aLCS[2]==0 ){ sbsShiftLeft(p, pLeft->z); p->zStart = zClassRm; }else{ p->zStart = zClassChng; } p->iStart2 = nPrefix + aLCS[1]; p->iEnd2 = nLeft - nSuffix; p->zStart2 = aLCS[3]==nRightDiff ? zClassRm : zClassChng; sbsSimplifyLine(p, zLeft+nPrefix); rc = sbsWriteText(p, pLeft, SBS_TXTA); if(!rc) rc = sbsWriteMarker(p, " | ", "|"); if(!rc) rc = sbsWriteLineno(p, lnRight, SBS_LNB); if(rc) return rc; p->iStart = nPrefix; p->iEnd = nPrefix + aLCS[2]; if( aLCS[0]==0 ){ sbsShiftLeft(p, pRight->z); p->zStart = zClassAdd; }else{ p->zStart = zClassChng; } p->iStart2 = nPrefix + aLCS[3]; p->iEnd2 = nRight - nSuffix; p->zStart2 = aLCS[1]==nLeftDiff ? zClassAdd : zClassChng; sbsSimplifyLine(p, zRight+nPrefix); rc = sbsWriteText(p, pRight, SBS_TXTB); return rc; } /* If all else fails, show a single big change between left and right */ rc = sbsWriteLineno(p, lnLeft, SBS_LNA); if(!rc){ p->iStart2 = p->iEnd2 = 0; p->iStart = nPrefix; p->iEnd = nLeft - nSuffix; p->zStart = zClassChng; rc = sbsWriteText(p, pLeft, SBS_TXTA); if(!rc){ rc = sbsWriteMarker(p, " | ", "|"); if(!rc){ rc = sbsWriteLineno(p, lnRight, SBS_LNB); if(!rc){ p->iEnd = nRight - nSuffix; sbsWriteText(p, pRight, SBS_TXTB); } } } } return rc; } /* ** Return the number between 0 and 100 that is smaller the closer pA and ** pB match. Return 0 for a perfect match. Return 100 if pA and pB are ** completely different. ** ** The current algorithm is as follows: ** ** (1) Remove leading and trailing whitespace. ** (2) Truncate both strings to at most 250 characters ** (3) Find the length of the longest common subsequence ** (4) Longer common subsequences yield lower scores. */ static int match_dline(DLine *pA, DLine *pB){ const char *zA; /* Left string */ const char *zB; /* right string */ int nA; /* Bytes in zA[] */ int nB; /* Bytes in zB[] */ int avg; /* Average length of A and B */ int i, j, k; /* Loop counters */ int best = 0; /* Longest match found so far */ int score; /* Final score. 0..100 */ unsigned char c; /* Character being examined */ unsigned char aFirst[256]; /* aFirst[X] = index in zB[] of first char X */ unsigned char aNext[252]; /* aNext[i] = index in zB[] of next zB[i] char */ zA = pA->z; zB = pB->z; nA = pA->h & LENGTH_MASK; nB = pB->h & LENGTH_MASK; while( nA>0 && fsl_isspace(zA[0]) ){ nA--; zA++; } while( nA>0 && fsl_isspace(zA[nA-1]) ){ nA--; } while( nB>0 && fsl_isspace(zB[0]) ){ nB--; zB++; } while( nB>0 && fsl_isspace(zB[nB-1]) ){ nB--; } if( nA>250 ) nA = 250; if( nB>250 ) nB = 250; avg = (nA+nB)/2; if( avg==0 ) return 0; if( nA==nB && memcmp(zA, zB, nA)==0 ) return 0; memset(aFirst, 0, sizeof(aFirst)); zA--; zB--; /* Make both zA[] and zB[] 1-indexed */ for(i=nB; i>0; i--){ c = (unsigned char)zB[i]; aNext[i] = aFirst[c]; aFirst[c] = i; } best = 0; for(i=1; i<=nA-best; i++){ c = (unsigned char)zA[i]; for(j=aFirst[c]; j>0 && jbest ) best = k; } } score = (best>avg) ? 0 : (avg - best)*100/avg; #if 0 fprintf(stderr, "A: [%.*s]\nB: [%.*s]\nbest=%d avg=%d score=%d\n", nA, zA+1, nB, zB+1, best, avg, score); #endif /* Return the result */ return score; } /* ** There is a change block in which nLeft lines of text on the left are ** converted into nRight lines of text on the right. This routine computes ** how the lines on the left line up with the lines on the right. ** ** The return value is a buffer of unsigned characters, obtained from ** fsl_malloc(). (The caller needs to free the return value using ** fsl_free().) Entries in the returned array have values as follows: ** ** 1. Delete the next line of pLeft. ** 2. Insert the next line of pRight. ** 3. The next line of pLeft changes into the next line of pRight. ** 4. Delete one line from pLeft and add one line to pRight. ** ** Values larger than three indicate better matches. ** ** The length of the returned array will be just large enough to cause ** all elements of pLeft and pRight to be consumed. ** ** Algorithm: Wagner's minimum edit-distance algorithm, modified by ** adding a cost to each match based on how well the two rows match ** each other. Insertion and deletion costs are 50. Match costs ** are between 0 and 100 where 0 is a perfect match 100 is a complete ** mismatch. */ static unsigned char *sbsAlignment( DLine *aLeft, int nLeft, /* Text on the left */ DLine *aRight, int nRight /* Text on the right */ ){ int i, j, k; /* Loop counters */ int *a; /* One row of the Wagner matrix */ int *pToFree; /* Space that needs to be freed */ unsigned char *aM; /* Wagner result matrix */ int nMatch, iMatch; /* Number of matching lines and match score */ int mnLen; /* MIN(nLeft, nRight) */ int mxLen; /* MAX(nLeft, nRight) */ int aBuf[100]; /* Stack space for a[] if nRight not to big */ aM = (unsigned char *)fsl_malloc( (nLeft+1)*(nRight+1) ); if(!aM) return NULL; if( nLeft==0 ){ memset(aM, 2, nRight); return aM; } if( nRight==0 ){ memset(aM, 1, nLeft); return aM; } /* This algorithm is O(N**2). So if N is too big, bail out with a ** simple (but stupid and ugly) result that doesn't take too long. */ mnLen = nLeft100000 ){ memset(aM, 4, mnLen); if( nLeft>mnLen ) memset(aM+mnLen, 1, nLeft-mnLen); if( nRight>mnLen ) memset(aM+mnLen, 2, nRight-mnLen); return aM; } if( nRight < (sizeof(aBuf)/sizeof(aBuf[0]))-1 ){ pToFree = 0; a = aBuf; }else{ a = pToFree = fsl_malloc( sizeof(a[0])*(nRight+1) ); if(!a){ fsl_free(aM); return NULL; } } /* Compute the best alignment */ for(i=0; i<=nRight; i++){ aM[i] = 2; a[i] = i*50; } aM[0] = 0; for(j=1; j<=nLeft; j++){ int p = a[0]; a[0] = p+50; aM[j*(nRight+1)] = 1; for(i=1; i<=nRight; i++){ int m = a[i-1]+50; int d = 2; if( m>a[i]+50 ){ m = a[i]+50; d = 1; } if( m>p ){ int score = match_dline(&aLeft[j-1], &aRight[i-1]); if( (score<=63 || (ij-1)) && m>p+score ){ m = p+score; d = 3 | score*4; } } p = a[i]; a[i] = m; aM[j*(nRight+1)+i] = d; } } /* Compute the lowest-cost path back through the matrix */ i = nRight; j = nLeft; k = (nRight+1)*(nLeft+1)-1; nMatch = iMatch = 0; while( i+j>0 ){ unsigned char c = aM[k]; if( c>=3 ){ assert( i>0 && j>0 ); i--; j--; nMatch++; iMatch += (c>>2); aM[k] = 3; }else if( c==2 ){ assert( i>0 ); i--; }else{ assert( j>0 ); j--; } k--; aM[k] = aM[j*(nRight+1)+i]; } k++; i = (nRight+1)*(nLeft+1) - k; memmove(aM, &aM[k], i); /* If: ** (1) the alignment is more than 25% longer than the longest side, and ** (2) the average match cost exceeds 15 ** Then this is probably an alignment that will be difficult for humans ** to read. So instead, just show all of the right side inserted followed ** by all of the left side deleted. ** ** The coefficients for conditions (1) and (2) above are determined by ** experimentation. */ mxLen = nLeft>nRight ? nLeft : nRight; if( i*4>mxLen*5 && (nMatch==0 || iMatch/nMatch>15) ){ memset(aM, 4, mnLen); if( nLeft>mnLen ) memset(aM+mnLen, 1, nLeft-mnLen); if( nRight>mnLen ) memset(aM+mnLen, 2, nRight-mnLen); } /* Return the result */ fsl_free(pToFree); return aM; } /* ** R[] is an array of six integer, two COPY/DELETE/INSERT triples for a ** pair of adjacent differences. Return true if the gap between these ** two differences is so small that they should be rendered as a single ** edit. */ static int smallGap(int *R){ return R[3]<=2 || R[3]<=(R[1]+R[2]+R[4]+R[5])/8; } /* ** Given a diff context in which the aEdit[] array has been filled ** in, compute a side-by-side diff into pOut. */ static int sbsDiff( DContext *p, /* The computed diff */ DiffOutState *pOut, /* Write the results here */ ReCompiled *pRe, /* Only show changes that match this regex */ u64 diffFlags /* Flags controlling the diff */ ){ DLine *A; /* Left side of the diff */ DLine *B; /* Right side of the diff */ int rc = 0; int a = 0; /* Index of next line in A[] */ int b = 0; /* Index of next line in B[] */ int *R; /* Array of COPY/DELETE/INSERT triples */ int r; /* Index into R[] */ int nr; /* Number of COPY/DELETE/INSERT triples to process */ int mxr; /* Maximum value for r */ int na, nb; /* Number of lines shown from A and B */ int i, j; /* Loop counters */ int m, ma, mb;/* Number of lines to output */ int skip; /* Number of lines to skip */ static int nChunk = 0; /* Number of chunks of diff output seen so far */ SbsLine s; /* Output line buffer */ int nContext; /* Lines of context above and below each change */ int showDivider = 0; /* True to show the divider */ fsl_buffer unesc = fsl_buffer_empty; fsl_buffer aCols[5] = { /* Array of column blobs */ fsl_buffer_empty_m, fsl_buffer_empty_m, fsl_buffer_empty_m, fsl_buffer_empty_m, fsl_buffer_empty_m }; memset(&s, 0, sizeof(s)); s.width = diff_width(diffFlags); nContext = diff_context_lines(diffFlags); s.escHtml = (diffFlags & DIFF_HTML)!=0; if( s.escHtml ){ for(i=SBS_LNA; i<=SBS_TXTB; i++){ s.apCols[i] = &aCols[i]; } }else{ for(i=SBS_LNA; i<=SBS_TXTB; i++){ s.apCols[i] = &unesc; } } s.pRe = pRe; s.iStart = -1; s.iStart2 = 0; s.iEnd = -1; A = p->aFrom; B = p->aTo; R = p->aEdit; mxr = p->nEdit; while( mxr>2 && R[mxr-1]==0 && R[mxr-2]==0 ){ mxr -= 3; } for(r=0; r0 && R[r+nr*3]nContext ){ na = nb = nContext; skip = R[r] - nContext; }else{ na = nb = R[r]; skip = 0; } for(i=0; inContext ){ na += nContext; nb += nContext; }else{ na += R[r+nr*3]; nb += R[r+nr*3]; } for(i=1; i", nChunk); } /* Show the initial common area */ a += skip; b += skip; m = R[r] - skip; for(j=0; !rc && j0; j++){ if( alignment[j]==1 ){ /* Delete one line from the left */ rc = sbsWriteLineno(&s, a, SBS_LNA); if(rc) goto end_align; s.iStart = 0; s.zStart = ""; s.iEnd = LENGTH(&A[a]); rc = sbsWriteText(&s, &A[a], SBS_TXTA); if(!rc) rc = sbsWriteMarker(&s, " <", "<"); if(!rc) rc = sbsWriteNewlines(&s); if(rc) goto end_align; assert( ma>0 ); ma--; a++; }else if( alignment[j]==3 ){ /* The left line is changed into the right line */ rc = sbsWriteLineChange(&s, &A[a], a, &B[b], b); if(rc) goto end_align; assert( ma>0 && mb>0 ); ma--; mb--; a++; b++; }else if( alignment[j]==2 ){ /* Insert one line on the right */ if( !s.escHtml ){ rc = sbsWriteSpace(&s, s.width + 7, SBS_TXTA); } if(!rc) rc = sbsWriteMarker(&s, " > ", ">"); if(!rc) rc = sbsWriteLineno(&s, b, SBS_LNB); if(rc) goto end_align; s.iStart = 0; s.zStart = ""; s.iEnd = LENGTH(&B[b]); rc = sbsWriteText(&s, &B[b], SBS_TXTB); if(rc) goto end_align; assert( mb>0 ); mb--; b++; }else{ /* Delete from the left and insert on the right */ rc = sbsWriteLineno(&s, a, SBS_LNA); if(rc) goto end_align; s.iStart = 0; s.zStart = ""; s.iEnd = LENGTH(&A[a]); rc = sbsWriteText(&s, &A[a], SBS_TXTA); if(!rc) rc = sbsWriteMarker(&s, " | ", "|"); if(!rc) rc = sbsWriteLineno(&s, b, SBS_LNB); if(rc) goto end_align; s.iStart = 0; s.zStart = ""; s.iEnd = LENGTH(&B[b]); rc = sbsWriteText(&s, &B[b], SBS_TXTB); if(rc) goto end_align; ma--; mb--; a++; b++; } } end_align: fsl_free(alignment); if(rc) goto end; if( inContext ) m = nContext; for(j=0; !rc && jused>0) ){ rc = diff_out(pOut, "\n", -1); for(i=SBS_LNA; !rc && i<=SBS_TXTB; i++){ rc = sbsWriteColumn(pOut, s.apCols[i], i); } rc = diff_out(pOut, "
\n", -1); }else if(unesc.used){ rc = pOut->out(pOut->oState, unesc.mem, unesc.used); } end: for( i = 0; i < sizeof(aCols)/sizeof(aCols[0]); ++i ){ fsl_buffer_clear(&aCols[i]); } fsl_buffer_clear(&unesc); return rc; } static int fsl_diff_text_impl( fsl_buffer const *pA, /* FROM file */ fsl_buffer const *pB, /* TO file */ fsl_output_f out, void * outState, /* ReCompiled *pRe, */ /* Only output changes where this Regexp matches */ short contextLines, short sbsWidth, int diffFlags_, /* FSL_DIFF_* flags */ int ** outRaw ){ int ignoreEolWs; /* Ignore whitespace at the end of lines */ int rc; DContext c; DiffOutState dos = DiffOutState_empty; fsl_uint64_t diffFlags = fsl_diff_flags_convert(diffFlags_); if(!pA || !pB || (!out && !outRaw)) return FSL_RC_MISUSE; else if(contextLines<=0) contextLines = 5; else if(contextLines & ~LENGTH_MASK) contextLines = (int)LENGTH_MASK; diffFlags |= (LENGTH_MASK & contextLines); /* Encode SBS width... */ if(sbsWidth<0 || ((DIFF_SIDEBYSIDE & diffFlags) && !sbsWidth) ) sbsWidth = 80; if(sbsWidth) diffFlags |= DIFF_SIDEBYSIDE; diffFlags |= ((int)(sbsWidth & 0xFF))<<16; dos.out = out; dos.oState = outState; if( diffFlags & DIFF_INVERT ){ fsl_buffer const *pTemp = pA; pA = pB; pB = pTemp; } ignoreEolWs = (diffFlags & DIFF_IGNORE_EOLWS)!=0; /* Prepare the input files */ memset(&c, 0, sizeof(c)); rc = break_into_lines(fsl_buffer_cstr(pA), fsl_buffer_size(pA), &c.nFrom, &c.aFrom, ignoreEolWs); if(rc) goto end; rc = break_into_lines(fsl_buffer_cstr(pB), fsl_buffer_size(pB), &c.nTo, &c.aTo, ignoreEolWs); if(rc) goto end; if( c.aFrom==0 || c.aTo==0 ){ /* Empty diff */ goto end; } /* Compute the difference */ rc = diff_all(&c); if(rc) goto end; if( (diffFlags & DIFF_NOTTOOBIG)!=0 ){ int i, m, n; int *a = c.aEdit; int mx = c.nEdit; for(i=m=n=0; i10000 ){ rc = FSL_RC_RANGE; /* diff_errmsg(pOut, DIFF_TOO_MANY_CHANGES, diffFlags); */ goto end;; } } if( (diffFlags & DIFF_NOOPT)==0 ){ diff_optimize(&c); } if( out ){ /* Compute a context or side-by-side diff into pOut */ /* MISSING: */ if( diffFlags & DIFF_SIDEBYSIDE ){ rc = sbsDiff(&c, &dos, NULL/*pRe*/, diffFlags); }else{ rc = contextDiff(&c, &dos, NULL/*pRe*/, diffFlags); } }else if(outRaw){ /* If a context diff is not requested, then return the ** array of COPY/DELETE/INSERT triples. */ *outRaw = c.aEdit; c.aEdit = NULL; } end: fsl_free(c.aFrom); fsl_free(c.aTo); fsl_free(c.aEdit); return rc; } int fsl_diff_text(fsl_buffer const *pA, fsl_buffer const *pB, fsl_output_f out, void * outState, short contextLines, short sbsWidth, int diffFlags ){ return fsl_diff_text_impl(pA, pB, out, outState, contextLines, sbsWidth, diffFlags, NULL ); } int fsl_diff_text_to_buffer(fsl_buffer const *pA, fsl_buffer const *pB, fsl_buffer * pOut, short contextLines, short sbsWidth, int diffFlags ){ return (pA && pB && pOut) ? fsl_diff_text_impl(pA, pB, fsl_output_f_buffer, pOut, contextLines, sbsWidth, diffFlags, NULL ) : FSL_RC_MISUSE; } #undef TO_BE_STATIC #undef LENGTH_MASK #undef LENGTH_MASK_SZ #undef LENGTH #undef DIFF_CONTEXT_MASK #undef DIFF_WIDTH_MASK #undef DIFF_IGNORE_EOLWS #undef DIFF_SIDEBYSIDE #undef DIFF_VERBOSE #undef DIFF_BRIEF #undef DIFF_INLINE #undef DIFF_HTML #undef DIFF_LINENO #undef DIFF_WS_WARNING #undef DIFF_NOOPT #undef DIFF_INVERT #undef DIFF_CONTEXT_EX #undef DIFF_NOTTOOBIG #undef minInt