/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim: set ts=2 et sw=2 tw=80: */ /* Copyright 2013-2021 The Libfossil Authors, see LICENSES/BSD-2-Clause.txt SPDX-License-Identifier: BSD-2-Clause-FreeBSD SPDX-FileCopyrightText: 2021 The Libfossil Authors SPDX-ArtifactOfProjectName: Libfossil SPDX-FileType: Code Heavily indebted to the Fossil SCM project (https://fossil-scm.org). */ /** This file houses the "2nd generation" diff-generation APIs. This code is a straight port of those algorithms from the Fossil SCM project, initially implemented by D. Richard Hipp, ported and the license re-assigned to this project with this consent. */ #include "fossil-scm/fossil.h" #include #include #include #include /* for memmove()/strlen() */ #include #define MARKER(pfexp) \ do{ printf("MARKER: %s:%d:%s():\t",__FILE__,__LINE__,__func__); \ printf pfexp; \ } while(0) const fsl_diff_opt fsl_diff_opt_empty = fsl_diff_opt_empty_m; const fsl_diff_builder fsl_diff_builder_empty = fsl_diff_builder_empty_m; const fsl_dline fsl_dline_empty = fsl_dline_empty_m; const fsl_dline_change fsl_dline_change_empty = fsl_dline_change_empty_m; const fsl__diff_cx fsl__diff_cx_empty = fsl__diff_cx_empty_m; void fsl__diff_cx_clean(fsl__diff_cx * const cx){ fsl_free(cx->aFrom); fsl_free(cx->aTo); fsl_free(cx->aEdit); cx->aFrom = cx->aTo = NULL; cx->aEdit = NULL; *cx = fsl__diff_cx_empty; } /** Counts the number of lines in the first n bytes of the given string. If n<0 then fsl_strlen() is used to count it. It includes the last line in the count even if it lacks the \n terminator. If an empty string is passed in, the number of lines is zero. For the purposes of this function, a string is considered empty if it contains no characters OR contains only NUL characters. If the input appears to be plain text it returns true and, if nOut is not NULL, writes the number of lines there. If the input appears to be binary, returns false and does not modify nOut. */ FSL_EXPORT bool fsl_count_lines(const char *z, fsl_int_t n, uint32_t * nOut ); bool fsl_count_lines(const char *z, fsl_int_t n, uint32_t * nOut ){ uint32_t nLine; const char *zNL, *z2; if(n<0) n = (fsl_int_t)fsl_strlen(z); for(nLine=0, z2=z; (zNL = strchr(z2,'\n'))!=0; z2=zNL+1, nLine++){} if( z2[0]!='\0' ){ nLine++; do{ z2++; }while( z2[0]!='\0' ); } if( n!=(fsl_int_t)(z2-z) ) return false; if( nOut ) *nOut = nLine; return true; } int fsl_break_into_dlines(const char *z, fsl_int_t n, uint32_t *pnLine, fsl_dline **pOut, uint64_t diffFlags){ uint32_t nLine, i, k, nn, s, x; uint64_t h, h2; fsl_dline *a = 0; const char *zNL; if(!z || !n){ *pnLine = 0; *pOut = NULL; return 0; } if( !fsl_count_lines(z, n, &nLine) ){ return FSL_RC_DIFF_BINARY; } assert( nLine>0 || z[0]=='\0' ); if(nLine>0){ a = fsl_malloc( sizeof(a[0])*nLine ); if(!a) return FSL_RC_OOM; memset(a, 0, sizeof(a[0])*nLine); }else{ *pnLine = 0; *pOut = a; return 0; } assert( a ); i = 0; do{ zNL = strchr(z,'\n'); if( zNL==0 ) zNL = z+n; nn = (uint32_t)(zNL - z); if( nn>FSL__LINE_LENGTH_MASK ){ fsl_free(a); *pOut = 0; *pnLine = 0; return FSL_RC_DIFF_BINARY; } a[i].z = z; k = nn; if( diffFlags & FSL_DIFF2_STRIP_EOLCR ){ if( k>0 && z[k-1]=='\r' ){ k--; } } a[i].n = k; s = 0; if( diffFlags & FSL_DIFF2_IGNORE_EOLWS ){ while( k>0 && fsl_isspace(z[k-1]) ){ k--; } } if( (diffFlags & FSL_DIFF2_IGNORE_ALLWS) ==FSL_DIFF2_IGNORE_ALLWS ){ uint32_t numws = 0; while( sh!=pB->h ) return 1; return memcmp(pA->z,pB->z, pA->h&FSL__LINE_LENGTH_MASK); } int fsl_dline_cmp_ignore_ws(const fsl_dline * const pA, const fsl_dline * const pB){ unsigned short a = pA->indent, b = pB->indent; if( pA->h==pB->h ){ while( an || bn ){ if( an && bn && pA->z[a++] != pB->z[b++] ) return 1; while( an && fsl_isspace(pA->z[a])) ++a; while( bn && fsl_isspace(pB->z[b])) ++b; } return pA->n-a != pB->n-b; } return 1; } /* ** The two text segments zLeft and zRight are known to be different on ** both ends, but they might have a common segment in the middle. If ** they do not have a common segment, return 0. If they do have a large ** common segment, return non-0 and before doing so set: ** ** aLCS[0] = start of the common segment in zLeft ** aLCS[1] = end of the common segment in zLeft ** aLCS[2] = start of the common segment in zLeft ** aLCS[3] = end of the common segment in zLeft ** ** This computation is for display purposes only and does not have to be ** optimal or exact. */ static int textLCS2( const char *zLeft, uint32_t nA, /* String on the left */ const char *zRight, uint32_t nB, /* String on the right */ uint32_t *aLCS /* Identify bounds of LCS here */ ){ const unsigned char *zA = (const unsigned char*)zLeft; /* left string */ const unsigned char *zB = (const unsigned char*)zRight; /* right string */ uint32_t i, j, k; /* Loop counters */ uint32_t lenBest = 0; /* Match length to beat */ for(i=0; ilenBest ){ lenBest = k; aLCS[0] = i; aLCS[1] = i+k; aLCS[2] = j; aLCS[3] = j+k; } } } } return lenBest>0; } /* ** Find the smallest spans that are different between two text strings ** that are known to be different on both ends. Returns the number ** of entries in p->a which get populated. */ static unsigned short textLineChanges( const char *zLeft, uint32_t nA, /* String on the left */ const char *zRight, uint32_t nB, /* String on the right */ fsl_dline_change * const p /* Write results here */ ){ p->n = 1; p->a[0].iStart1 = 0; p->a[0].iLen1 = nA; p->a[0].iStart2 = 0; p->a[0].iLen2 = nB; p->a[0].isMin = 0; while( p->nn; i++){ if( p->a[i].isMin ) continue; x = p->a[i].iLen1; if( p->a[i].iLen2a[i].iLen2; if( x>mxLen ){ mxLen = x; mxi = i; } } if( mxLen<6 ) break; x = textLCS2(zLeft + p->a[mxi].iStart1, p->a[mxi].iLen1, zRight + p->a[mxi].iStart2, p->a[mxi].iLen2, aLCS); if( x==0 ){ p->a[mxi].isMin = 1; continue; } a = p->a+mxi; b = a+1; if( mxin-1 ){ memmove(b+1, b, sizeof(*b)*(p->n-mxi-1)); } p->n++; b->iStart1 = a->iStart1 + aLCS[1]; b->iLen1 = a->iLen1 - aLCS[1]; a->iLen1 = aLCS[0]; b->iStart2 = a->iStart2 + aLCS[3]; b->iLen2 = a->iLen2 - aLCS[3]; a->iLen2 = aLCS[2]; b->isMin = 0; } return p->n; } /* ** Return true if the string starts with n spaces */ static int allSpaces(const char *z, int n){ int i; for(i=0; in<1 ) return; /* (1) Attempt to move indentation changes to the left margin */ if( p->a[0].iLen1==0 && (len = p->a[0].iLen2)>0 && (j = p->a[0].iStart2)>0 && zB[0]==zB[j] && allSpaces(zB, j) ){ for(n=1; na[1], &p->a[0], sizeof(p->a[0])*p->n); p->n++; p->a[0] = p->a[1]; p->a[1].iStart2 += n; p->a[1].iLen2 -= n; p->a[0].iLen2 = n; } p->a[0].iStart1 = 0; p->a[0].iStart2 = 0; }else if( p->a[0].iLen2==0 && (len = p->a[0].iLen1)>0 && (j = p->a[0].iStart1)>0 && zA[0]==zA[j] && allSpaces(zA, j) ){ for(n=1; na[1], &p->a[0], sizeof(p->a[0])*p->n); p->n++; p->a[0] = p->a[1]; p->a[1].iStart1 += n; p->a[1].iLen1 -= n; p->a[0].iLen1 = n; } p->a[0].iStart1 = 0; p->a[0].iStart2 = 0; } /* (2) Try to shift changes so that they begin or end with a ** space. (TBD) */ } void fsl_dline_change_spans(const fsl_dline *pLeft, const fsl_dline *pRight, fsl_dline_change * const p){ /* fossil(1) counterpart ==> diff.c oneLineChange() */ int nLeft; /* Length of left line in bytes */ int nRight; /* Length of right line in bytes */ int nShort; /* Shortest of left and right */ int nPrefix; /* Length of common prefix */ int nSuffix; /* Length of common suffix */ int nCommon; /* Total byte length of suffix and prefix */ const char *zLeft; /* Text of the left line */ const char *zRight; /* Text of the right line */ int nLeftDiff; /* nLeft - nPrefix - nSuffix */ int nRightDiff; /* nRight - nPrefix - nSuffix */ nLeft = pLeft->n; zLeft = pLeft->z; nRight = pRight->n; zRight = pRight->z; nShort = nLeft0 && (zLeft[nPrefix]&0xc0)==0x80 ) nPrefix--; } nSuffix = 0; if( nPrefix0 && (zLeft[nLeft-nSuffix]&0xc0)==0x80 ) nSuffix--; } if( nSuffix==nLeft || nSuffix==nRight ) nPrefix = 0; } nCommon = nPrefix + nSuffix; /* If the prefix and suffix overlap, that means that we are dealing with ** a pure insertion or deletion of text that can have multiple alignments. ** Try to find an alignment to begins and ends on whitespace, or on ** punctuation, rather than in the middle of a name or number. */ if( nCommon > nShort ){ int iBest = -1; int iBestVal = -1; int i; int nLong = nLeftiBestVal ){ iBestVal = iVal; iBest = i; } } nPrefix = iBest; nSuffix = nShort - nPrefix; nCommon = nPrefix + nSuffix; } /* A single chunk of text inserted */ if( nCommon==nLeft ){ p->n = 1; p->a[0].iStart1 = nPrefix; p->a[0].iLen1 = 0; p->a[0].iStart2 = nPrefix; p->a[0].iLen2 = nRight - nCommon; improveReadability(zLeft, zRight, p); return; } /* A single chunk of text deleted */ if( nCommon==nRight ){ p->n = 1; p->a[0].iStart1 = nPrefix; p->a[0].iLen1 = nLeft - nCommon; p->a[0].iStart2 = nPrefix; p->a[0].iLen2 = 0; improveReadability(zLeft, zRight, p); return; } /* At this point we know that there is a chunk of text that has ** changed between the left and the right. Check to see if there ** is a large unchanged section in the middle of that changed block. */ nLeftDiff = nLeft - nCommon; nRightDiff = nRight - nCommon; if( nLeftDiff >= 4 && nRightDiff >= 4 && textLineChanges(&zLeft[nPrefix], nLeftDiff, &zRight[nPrefix], nRightDiff, p)>1 ){ int i; for(i=0; in; i++){ p->a[i].iStart1 += nPrefix; p->a[i].iStart2 += nPrefix; } improveReadability(zLeft, zRight, p); return; } /* If all else fails, show a single big change between left and right */ p->n = 1; p->a[0].iStart1 = nPrefix; p->a[0].iLen1 = nLeft - nCommon; p->a[0].iStart2 = nPrefix; p->a[0].iLen2 = nRight - nCommon; improveReadability(zLeft, zRight, p); } /* ** The threshold at which diffBlockAlignment transitions from the ** O(N*N) Wagner minimum-edit-distance algorithm to a less process ** O(NlogN) divide-and-conquer approach. */ #define DIFF_ALIGN_MX 1225 /* ** R[] is an array of six integer, two COPY/DELETE/INSERT triples for a ** pair of adjacent differences. Return true if the gap between these ** two differences is so small that they should be rendered as a single ** edit. */ static int smallGap2(const int *R, int ma, int mb){ int m = R[3]; ma += R[4] + m; mb += R[5] + m; if( ma*mb>DIFF_ALIGN_MX ) return 0; return m<=2 || m<=(R[1]+R[2]+R[4]+R[5])/8; } static unsigned short diff_opt_context_lines(fsl_diff_opt const * opt){ const unsigned short dflt = 5; unsigned short n = opt ? opt->contextLines : dflt; if( !n && (opt->diffFlags & FSL_DIFF2_CONTEXT_ZERO)==0 ){ n = dflt; } return n; } /* ** Minimum of two values */ static int diffMin(int a, int b){ return az; zB = pB->z; nA = pA->n; nB = pB->n; while( nA>0 && (unsigned char)zA[0]<=' ' ){ nA--; zA++; } while( nA>0 && (unsigned char)zA[nA-1]<=' ' ){ nA--; } while( nB>0 && (unsigned char)zB[0]<=' ' ){ nB--; zB++; } while( nB>0 && (unsigned char)zB[nB-1]<=' ' ){ nB--; } if( nA>250 ) nA = 250; if( nB>250 ) nB = 250; avg = (nA+nB)/2; if( avg==0 ) return 0; nMin = nA; if( nB5 && nPrefix>nMin/2 ){ best = nPrefix*3/2; if( best>=avg - 2 ) best = avg - 2; } if( nA==nB && memcmp(zA, zB, nA)==0 ) return 0; memset(aFirst, 0xff, sizeof(aFirst)); zA--; zB--; /* Make both zA[] and zB[] 1-indexed */ for(i=nB; i>0; i--){ c = (unsigned char)zB[i]; aNext[i] = aFirst[c]; aFirst[c] = i; } for(i=1; i<=nA-best; i++){ c = (unsigned char)zA[i]; for(j=aFirst[c]; jbest ) best = k; } } score = (best>=avg) ? 0 : (avg - best)*100/avg; #if 0 fprintf(stderr, "A: [%.*s]\nB: [%.*s]\nbest=%d avg=%d score=%d\n", nA, zA+1, nB, zB+1, best, avg, score); #endif /* Return the result */ return score; } /* ** There is a change block in which nLeft lines of text on the left are ** converted into nRight lines of text on the right. This routine computes ** how the lines on the left line up with the lines on the right. ** ** The return value is a buffer of unsigned characters, obtained from ** fossil_malloc(). (The caller needs to free the return value using ** fossil_free().) Entries in the returned array have values as follows: ** ** 1. Delete the next line of pLeft. ** 2. Insert the next line of pRight. ** 3. The next line of pLeft changes into the next line of pRight. ** 4. Delete one line from pLeft and add one line to pRight. ** ** The length of the returned array will be at most nLeft+nRight bytes. ** If the first bytes is 4, that means we could not compute reasonable ** alignment between the two blocks. ** ** Algorithm: Wagner's minimum edit-distance algorithm, modified by ** adding a cost to each match based on how well the two rows match ** each other. Insertion and deletion costs are 50. Match costs ** are between 0 and 100 where 0 is a perfect match 100 is a complete ** mismatch. */ static int diffBlockAlignment( const fsl_dline *aLeft, int nLeft, /* Text on the left */ const fsl_dline *aRight, int nRight, /* Text on the right */ fsl_diff_opt *pOpt, /* Configuration options */ unsigned char **pResult, /* Raw result */ unsigned *pNResult /* OUTPUT: length of result */ ){ int i, j, k; /* Loop counters */ int *a = 0; /* One row of the Wagner matrix */ int *pToFree = 0; /* Space that needs to be freed */ unsigned char *aM = 0; /* Wagner result matrix */ int nMatch, iMatch; /* Number of matching lines and match score */ int aBuf[100]; /* Stack space for a[] if nRight not to big */ int rc = 0; if( nLeft==0 ){ aM = fsl_malloc( nRight + 2 ); if(!aM) return FSL_RC_OOM; memset(aM, 2, nRight); *pNResult = nRight; *pResult = aM; return 0; } if( nRight==0 ){ aM = fsl_malloc( nLeft + 2 ); if(!aM) return FSL_RC_OOM; memset(aM, 1, nLeft); *pNResult = nLeft; *pResult = aM; return 0; } /* For large alignments, use a divide and conquer algorithm that is ** O(NlogN). The result is not as precise, but this whole thing is an ** approximation anyhow, and the faster response time is an acceptable ** trade-off for reduced precision. */ if( nLeft*nRight>DIFF_ALIGN_MX && (pOpt->diffFlags & FSL_DIFF2_SLOW_SBS)==0 ){ const fsl_dline *aSmall; /* The smaller of aLeft and aRight */ const fsl_dline *aBig; /* The larger of aLeft and aRight */ int nSmall, nBig; /* Size of aSmall and aBig. nSmall<=nBig */ int iDivSmall, iDivBig; /* Divider point for aSmall and aBig */ int iDivLeft, iDivRight; /* Divider point for aLeft and aRight */ unsigned char *a1 = 0, *a2 = 0; /* Results of the alignments on two halves */ unsigned int n1, n2; /* Number of entries in a1 and a2 */ int score, bestScore; /* Score and best score seen so far */ if( nLeft>nRight ){ aSmall = aRight; nSmall = nRight; aBig = aLeft; nBig = nLeft; }else{ aSmall = aLeft; nSmall = nLeft; aBig = aRight; nBig = nRight; } iDivBig = nBig/2; iDivSmall = nSmall/2; bestScore = 10000; for(i=0; ia[i]+50 ){ m = a[i]+50; d = 1; } if( m>p ){ int const score = match_dline2(&aLeft[j-1], &aRight[i-1]); if( (score<=90 || (ij-1)) && m>p+score ){ m = p+score; d = 3 | score*4; } } p = a[i]; a[i] = m; aM[j*(nRight+1)+i] = d; } } /* Compute the lowest-cost path back through the matrix */ i = nRight; j = nLeft; k = (nRight+1)*(nLeft+1)-1; nMatch = iMatch = 0; while( i+j>0 ){ unsigned char c = aM[k]; if( c>=3 ){ assert( i>0 && j>0 ); i--; j--; nMatch++; iMatch += (c>>2); aM[k] = 3; }else if( c==2 ){ assert( i>0 ); i--; }else{ assert( j>0 ); j--; } k--; aM[k] = aM[j*(nRight+1)+i]; } k++; i = (nRight+1)*(nLeft+1) - k; memmove(aM, &aM[k], i); *pNResult = i; *pResult = aM; end: fsl_free(pToFree); return rc; } /* ** Format a diff using a fsl_diff_builder object */ static int fdb__format( fsl__diff_cx * const cx, fsl_diff_builder * const pBuilder ){ const fsl_dline *A; /* Left side of the diff */ const fsl_dline *B; /* Right side of the diff */ fsl_diff_opt * const pOpt = pBuilder->opt; const int *R; /* Array of COPY/DELETE/INSERT triples */ unsigned int a; /* Index of next line in A[] */ unsigned int b; /* Index of next line in B[] */ unsigned int r; /* Index into R[] */ unsigned int nr; /* Number of COPY/DELETE/INSERT triples to process */ unsigned int mxr; /* Maximum value for r */ unsigned int na, nb; /* Number of lines shown from A and B */ unsigned int i, j; /* Loop counters */ unsigned int m, ma, mb;/* Number of lines to output */ signed int skip; /* Number of lines to skip */ unsigned int contextLines; /* Lines of context above and below each change */ unsigned short passNumber = 0; int rc = 0; if( (na = 0) ){ /* clang-13 -Werror, Wunused-but-set-variable That's a clang bug: na is used below on the LHS of a ternary if. */ } #define RC if(rc) goto end pass_again: contextLines = diff_opt_context_lines(pOpt); skip = 0; a = b = 0; A = cx->aFrom; B = cx->aTo; R = cx->aEdit; mxr = cx->nEdit; //MARKER(("contextLines=%u, nEdit = %d, mxr=%u\n", contextLines, cx->nEdit, mxr)); while( mxr>2 && R[mxr-1]==0 && R[mxr-2]==0 ){ mxr -= 3; } pBuilder->lnLHS = pBuilder->lnRHS = 0; if(pBuilder->start){ pBuilder->passNumber = ++passNumber; rc = pBuilder->start(pBuilder); RC; } for(r=0; r0 && R[r+nr*3]<(int)contextLines*2; nr++){} #if 0 /* MISSING: this "should" be replaced by a stateful predicate function, probably in the fsl_diff_opt class. */ /* If there is a regex, skip this block (generate no diff output) ** if the regex matches or does not match both insert and delete. ** Only display the block if one side matches but the other side does ** not. */ if( pOpt->pRe ){ int hideBlock = 1; int xa = a, xb = b; for(i=0; hideBlock && ipRe, &A[xa], R[r+i*3+1]); c2 = re_dline_match(pOpt->pRe, &B[xb], R[r+i*3+2]); hideBlock = c1==c2; xa += R[r+i*3+1]; xb += R[r+i*3+2]; } if( hideBlock ){ a = xa; b = xb; continue; } } #endif /* Figure out how many lines of A and B are to be displayed ** for this change block. */ if( R[r]>(int)contextLines ){ na = nb = contextLines; skip = R[r] - contextLines; }else{ na = nb = R[r]; skip = 0; } for(i=0; i(int)contextLines ){ na += contextLines; nb += contextLines; }else{ na += R[r+nr*3]; nb += R[r+nr*3]; } for(i=1; ichunkHeader #if 0 /* The following two bits are a kludge to keep from injecting a chunk header between chunks which are directly adjacent */ && pBuilder->lnLHS != (uint32_t)(na ? a+skip+1 : a+skip)-1 && pBuilder->lnRHS != (uint32_t)(nb ? b+skip+1 : b+skip)-1 #endif ){ rc = pBuilder->chunkHeader(pBuilder, (uint32_t)(na ? a+skip+1 : a+skip), (uint32_t)na, (uint32_t)(nb ? b+skip+1 : b+skip), (uint32_t)nb); RC; } /* Show the initial common area */ a += skip; b += skip; m = R[r] - skip; if( r ) skip -= contextLines; //MARKER(("Show the initial common... a=%u, b=%u, m=%u, r=%u, skip=%d\n", a, b, m, r, skip)); if( skip>0 ){ if( NULL==pBuilder->chunkHeader && skip<(int)contextLines ){ /* 2021-09-27: BUG: this is incompatible with unified diff format. The generated header lines say we're skipping X lines but we then end up including lines which that header says to skip. As a workaround, we'll only run this when pBuilder->chunkHeader is NULL, noting that fossil's diff builder interface does not have that method. Without this block, our "utxt" diff builder can mimic fossil's non-diff builder unified diff format, except that we add Index lines (feature or bug?). With this block, the header values output above are wrong. */ /* If the amount to skip is less that the context band, then ** go ahead and show the skip band as it is not worth eliding */ //MARKER(("skip %d < contextLines %d\n", skip, contextLines)); /* from fossil(1) from formatDiff() */ for(j=0; 0==rc && j<(unsigned)skip; j++){ //MARKER(("(A) COMMON\n")); rc = pBuilder->common(pBuilder, &A[a+j-skip]); } }else{ rc = pBuilder->skip(pBuilder, skip); } RC; } for(j=0; 0==rc && jcommon(pBuilder, &A[a+j]); } RC; a += m; b += m; //MARKER(("Show the differences... a=%d, b=%d, m=%d\n", a, b, m)); /* Show the differences */ for(i=0; i0; j++){ assert( jdeletion(pBuilder, &A[a]); if(rc) goto bail; ma--; a++; break; } case 2: { /* Insert one line on the right */ rc = pBuilder->insertion(pBuilder, &B[b]); if(rc) goto bail; assert( mb>0 ); mb--; b++; break; } case 3: { /* The left line is changed into the right line */ if( cx->cmpLine(&A[a], &B[b])==0 ){ rc = pBuilder->common(pBuilder, &A[a]); }else{ rc = pBuilder->edit(pBuilder, &A[a], &B[b]); } if(rc) goto bail; assert( ma>0 && mb>0 ); ma--; mb--; a++; b++; break; } case 4: { /* Delete from left then separately insert on the right */ rc = pBuilder->replacement(pBuilder, &A[a], &B[b]); if(rc) goto bail; ma--; a++; mb--; b++; break; } } } assert( nAlign==j ); fsl_free(alignment); if( icommon(pBuilder, &A[a+j]); } RC; b += m; a += m; } continue; bail: assert(rc); fsl_free(alignment); goto end; } /* Show the final common area */ assert( nr==i ); m = R[r+nr*3]; if( m>contextLines ) m = contextLines; for(j=0; 0==rc && jcommon(pBuilder, &A[a+j]); } RC; } if( R[r]>(int)contextLines ){ rc = pBuilder->skip(pBuilder, R[r] - contextLines); } end: #undef RC if(0==rc){ if(pBuilder->finish) pBuilder->finish(pBuilder); if(pBuilder->twoPass && 1==passNumber){ goto pass_again; } } return rc; } /* MISSING(?) fossil(1) converts the diff inputs into utf8 with no BOM. Whether we really want to do that here or rely on the caller to is up for debate. If we do it here, we have to make the inputs non-const, which seems "wrong" for a library API. */ #define blob_to_utf8_no_bom(A,B) (void)0 /** Performs a diff of version 1 (pA) and version 2 (pB). ONE of pBuilder or outRaw must be non-NULL. If pBuilder is not NULL, all output for the diff is emitted via pBuilder. If outRaw is not NULL then on success *outRaw is set to the array of diff triples, transfering ownership to the caller, who must eventually fsl_free() it. On error, *outRaw is not modified but pBuilder may have emitted partial output. That is not knowable for the general case. Ownership of pBuilder is not changed. If pBuilder is not NULL then pBuilder->opt must be non-NULL. */ static int fsl_diff2_text_impl(fsl_buffer const *pA, fsl_buffer const *pB, fsl_diff_builder * const pBuilder, fsl_diff_opt const * const opt_, int ** outRaw){ int rc = 0; fsl__diff_cx c = fsl__diff_cx_empty; bool ignoreWs = false; int ansiOptCount = 0; fsl_diff_opt opt = *opt_ /*we need a copy for the sake of the FSL_DIFF2_INVERT flag*/; if(!pA || !pB || (pBuilder && outRaw)) return FSL_RC_MISUSE; blob_to_utf8_no_bom(pA, 0); blob_to_utf8_no_bom(pB, 0); if( opt.diffFlags & FSL_DIFF2_INVERT ){ char const * z; fsl_buffer const *pTemp = pA; pA = pB; pB = pTemp; z = opt.hashRHS; opt.hashRHS = opt.hashLHS; opt.hashLHS = z; z = opt.nameRHS; opt.nameRHS = opt.nameLHS; opt.nameLHS = z; } #define AOPT(OPT) \ if(opt.ansiColor.OPT) ansiOptCount += (*opt.ansiColor.OPT) ? 1 : 0; \ else opt.ansiColor.OPT = "" AOPT(insertion); AOPT(edit); AOPT(deletion); #undef AOPT if(0==ansiOptCount){ opt.ansiColor.reset = ""; }else if(!opt.ansiColor.reset || !*opt.ansiColor.reset){ opt.ansiColor.reset = "\x1b[0m"; } ignoreWs = (opt.diffFlags & FSL_DIFF2_IGNORE_ALLWS)!=0; if(FSL_DIFF2_IGNORE_ALLWS==(opt.diffFlags & FSL_DIFF2_IGNORE_ALLWS)){ c.cmpLine = fsl_dline_cmp_ignore_ws; }else{ c.cmpLine = fsl_dline_cmp; } rc = fsl_break_into_dlines(fsl_buffer_cstr(pA), (fsl_int_t)pA->used, (uint32_t*)&c.nFrom, &c.aFrom, opt.diffFlags); if(rc) goto end; rc = fsl_break_into_dlines(fsl_buffer_cstr(pB), (fsl_int_t)pB->used, (uint32_t*)&c.nTo, &c.aTo, opt.diffFlags); if(rc) goto end; /* Compute the difference */ rc = fsl__diff_all(&c); if(rc) goto end; if( ignoreWs && c.nEdit==6 && c.aEdit[1]==0 && c.aEdit[2]==0 ){ rc = FSL_RC_DIFF_WS_ONLY; goto end; } if( (opt.diffFlags & FSL_DIFF2_NOTTOOBIG)!=0 ){ int i, m, n; int const * const a = c.aEdit; int const mx = c.nEdit; for(i=m=n=0; i10000 ){ rc = FSL_RC_RANGE; /* diff_errmsg(pOut, DIFF_TOO_MANY_CHANGES, diffFlags); */ goto end; } } //fsl__dump_triples(&c, __FILE__, __LINE__); if( (opt.diffFlags & FSL_DIFF2_NOOPT)==0 ){ fsl__diff_optimize(&c); } //fsl__dump_triples(&c, __FILE__, __LINE__); /** Reference: https://fossil-scm.org/home/file?ci=cae7036bb7f07c1b&name=src/diff.c&ln=2749-2804 Noting that: - That function's return value is this one's *outRaw - DIFF_NUMSTAT flag is not implemented. For that matter, flags which result in output going anywhere except for pBuilder->out are not implemented here, e.g. DIFF_RAW. That last point makes this impl tiny compared to the original! */ if(pBuilder){ fsl_diff_opt * const oldOpt = pBuilder->opt; pBuilder->opt = &opt; rc = fdb__format(&c, pBuilder); pBuilder->opt = oldOpt; } end: if(0==rc && outRaw){ *outRaw = c.aEdit; c.aEdit = 0; } fsl__diff_cx_clean(&c); return rc; } int fsl_diff_v2(fsl_buffer const * pv1, fsl_buffer const * pv2, fsl_diff_builder * const pBuilder){ return fsl_diff2_text_impl(pv1, pv2, pBuilder, pBuilder->opt, NULL); } int fsl_diff_v2_raw(fsl_buffer const * pv1, fsl_buffer const * pv2, fsl_diff_opt const * const opt, int **outRaw ){ return fsl_diff2_text_impl(pv1, pv2, NULL, opt, outRaw); } /** Allocator for fsl_diff_builder instances. If extra is >0 then that much extra space is allocated as part of the same block and the pimpl member of the returned object is pointed to that space. */ static fsl_diff_builder * fsl__diff_builder_alloc(fsl_size_t extra){ fsl_diff_builder * rc = (fsl_diff_builder*)fsl_malloc(sizeof(fsl_diff_builder) + extra); if(rc){ *rc = fsl_diff_builder_empty; if(extra){ rc->pimpl = ((unsigned char *)rc)+sizeof(fsl_diff_builder); } } return rc; } static int fdb__out(fsl_diff_builder *const b, char const *z, fsl_size_t n){ return b->opt->out(b->opt->outState, z, n); } static int fdb__outf(fsl_diff_builder * const b, char const *fmt, ...){ int rc = 0; va_list va; assert(b->opt->out); va_start(va,fmt); rc = fsl_appendfv(b->opt->out, b->opt->outState, fmt, va); va_end(va); return rc; } static int fdb__debug_start(fsl_diff_builder * const b){ int rc = fdb__outf(b, "DEBUG builder starting up.\n"); if(0==rc && b->opt->nameLHS){ rc = fdb__outf(b,"LHS: %s\n", b->opt->nameLHS); } if(0==rc && b->opt->nameRHS){ rc = fdb__outf(b,"RHS: %s\n", b->opt->nameRHS); } if(0==rc && b->opt->hashLHS){ rc = fdb__outf(b,"LHS hash: %s\n", b->opt->hashLHS); } if(0==rc && b->opt->hashRHS){ rc = fdb__outf(b,"RHS hash: %s\n", b->opt->hashRHS); } return rc; } static int fdb__debug_chunkHeader(fsl_diff_builder* const b, uint32_t lnnoLHS, uint32_t linesLHS, uint32_t lnnoRHS, uint32_t linesRHS ){ #if 1 return fdb__outf(b, "@@ -%" PRIu32 ",%" PRIu32 " +%" PRIu32 ",%" PRIu32 " @@\n", lnnoLHS, linesLHS, lnnoRHS, linesRHS); #else return 0; #endif } static int fdb__debug_skip(fsl_diff_builder * const p, uint32_t n){ const int rc = fdb__outf(p, "SKIP %u (%u..%u left and %u..%u right)\n", n, p->lnLHS+1, p->lnLHS+n, p->lnRHS+1, p->lnRHS+n); p->lnLHS += n; p->lnRHS += n; return rc; } static int fdb__debug_common(fsl_diff_builder * const p, fsl_dline const * pLine){ ++p->lnLHS; ++p->lnRHS; return fdb__outf(p, "COMMON %8u %8u %.*s\n", p->lnLHS, p->lnRHS, (int)pLine->n, pLine->z); } static int fdb__debug_insertion(fsl_diff_builder * const p, fsl_dline const * pLine){ p->lnRHS++; return fdb__outf(p, "INSERT %8u %.*s\n", p->lnRHS, (int)pLine->n, pLine->z); } static int fdb__debug_deletion(fsl_diff_builder * const p, fsl_dline const * pLine){ p->lnLHS++; return fdb__outf(p, "DELETE %8u %.*s\n", p->lnLHS, (int)pLine->n, pLine->z); } static int fdb__debug_replacement(fsl_diff_builder * const p, fsl_dline const * lineLhs, fsl_dline const * lineRhs) { int rc; p->lnLHS++; p->lnRHS++; rc = fdb__outf(p, "REPLACE %8u %.*s\n", p->lnLHS, (int)lineLhs->n, lineLhs->z); if(!rc){ rc = fdb__outf(p, " %8u %.*s\n", p->lnRHS, (int)lineRhs->n, lineRhs->z); } return rc; } static int fdb__debug_edit(fsl_diff_builder * const b, fsl_dline const * pX, fsl_dline const * pY){ int rc = 0; int i, j; int x; fsl_dline_change chng = fsl_dline_change_empty; #define RC if(rc) goto end b->lnLHS++; b->lnRHS++; rc = fdb__outf(b, "EDIT %8u %.*s\n", b->lnLHS, (int)pX->n, pX->z); RC; fsl_dline_change_spans(pX, pY, &chng); for(i=x=0; i x ){ if( (pX->z[x]&0xc0)!=0x80 ){ rc = fdb__out(b, " ", 1); RC; } x++; } for(j=0; jz[x]&0xc0)!=0x80 ){ rc = fdb__out(b, &c, 1); RC; } } } } if( x ){ rc = fdb__out(b, "\n", 1); RC; } rc = fdb__outf(b, " %8u %.*s\n", b->lnRHS, (int)pY->n, pY->z); RC; for(i=x=0; i x ){ if( (pY->z[x]&0xc0)!=0x80 ){ rc = fdb__out(b, " ", 1); RC; } x++; } for(j=0; jz[x]&0xc0)!=0x80 ){ rc = fdb__out(b, &c, 1); RC; } } } } if( x ){ rc = fdb__out(b, "\n", 1); } end: #undef RC return rc; } static int fdb__debug_finish(fsl_diff_builder * const b){ return fdb__outf(b, "END with %u lines left and %u lines right\n", b->lnLHS, b->lnRHS); } static void fdb__generic_finalize(fsl_diff_builder * const b){ fsl_free(b); } static fsl_diff_builder * fsl__diff_builder_debug(void){ fsl_diff_builder * rc = fsl__diff_builder_alloc(0); if(rc){ rc->chunkHeader = fdb__debug_chunkHeader; rc->start = fdb__debug_start; rc->skip = fdb__debug_skip; rc->common = fdb__debug_common; rc->insertion = fdb__debug_insertion; rc->deletion = fdb__debug_deletion; rc->replacement = fdb__debug_replacement; rc->edit = fdb__debug_edit; rc->finish = fdb__debug_finish; rc->finalize = fdb__generic_finalize; assert(!rc->pimpl); assert(0==rc->implFlags); assert(0==rc->lnLHS); assert(0==rc->lnRHS); assert(NULL==rc->opt); } return rc; } /******************** json1 diff builder ********************/ /* Description taken verbatim from fossil(1): */ /* ** This formatter generates a JSON array that describes the difference. ** ** The Json array consists of integer opcodes with each opcode followed ** by zero or more arguments: ** ** Syntax Mnemonic Description ** ----------- -------- -------------------------- ** 0 END This is the end of the diff ** 1 INTEGER SKIP Skip N lines from both files ** 2 STRING COMMON The line show by STRING is in both files ** 3 STRING INSERT The line STRING is in only the right file ** 4 STRING DELETE The STRING line is in only the left file ** 5 SUBARRAY EDIT One line is different on left and right. ** ** The SUBARRAY is an array of 3*N+1 strings with N>=0. The triples ** represent common-text, left-text, and right-text. The last string ** in SUBARRAY is the common-suffix. Any string can be empty if it does ** not apply. */ static int fdb__outj(fsl_diff_builder * const b, char const *zJson, int n){ return n<0 ? fdb__outf(b, "%!j", zJson) : fdb__outf(b, "%!.*j", n, zJson); } static int fdb__json1_start(fsl_diff_builder * const b){ int rc = fdb__outf(b, "{\"hashLHS\": %!j, \"hashRHS\": %!j, ", b->opt->hashLHS, b->opt->hashRHS); if(0==rc && b->opt->nameLHS){ rc = fdb__outf(b, "\"nameLHS\": %!j, ", b->opt->nameLHS); } if(0==rc && b->opt->nameRHS){ rc = fdb__outf(b, "\"nameRHS\": %!j, ", b->opt->nameRHS); } if(0==rc){ rc = fdb__out(b, "\"diff\":[", 8); } return rc; } static int fdb__json1_skip(fsl_diff_builder * const b, uint32_t n){ return fdb__outf(b, "1,%" PRIu32 ",\n", n); } static int fdb__json1_common(fsl_diff_builder * const b, fsl_dline const * pLine){ int rc = fdb__out(b, "2,",2); if(!rc) { rc = fdb__outj(b, pLine->z, (int)pLine->n); if(!rc) rc = fdb__out(b, ",\n",2); } return rc; } static int fdb__json1_insertion(fsl_diff_builder * const b, fsl_dline const * pLine){ int rc = fdb__out(b, "3,",2); if(!rc){ rc = fdb__outj(b, pLine->z, (int)pLine->n); if(!rc) rc = fdb__out(b, ",\n",2); } return rc; } static int fdb__json1_deletion(fsl_diff_builder * const b, fsl_dline const * pLine){ int rc = fdb__out(b, "4,",2); if(!rc){ rc = fdb__outj(b, pLine->z, (int)pLine->n); if(!rc) rc = fdb__out(b, ",\n",2); } return rc; } static int fdb__json1_replacement(fsl_diff_builder * const b, fsl_dline const * lineLhs, fsl_dline const * lineRhs) { int rc = fdb__out(b, "5,[\"\",",6); if(!rc) rc = fdb__outf(b,"%!.*j", (int)lineLhs->n, lineLhs->z); if(!rc) rc = fdb__out(b, ",", 1); if(!rc) rc = fdb__outf(b,"%!.*j", (int)lineRhs->n, lineRhs->z); if(!rc) rc = fdb__out(b, ",\"\"],\n",6); return rc; } static int fdb__json1_edit(fsl_diff_builder * const b, fsl_dline const * pX, fsl_dline const * pY){ int rc = 0; int i,x; fsl_dline_change chng = fsl_dline_change_empty; #define RC if(rc) goto end rc = fdb__out(b, "5,[", 3); RC; fsl_dline_change_spans(pX, pY, &chng); for(i=x=0; i<(int)chng.n; i++){ rc = fdb__outj(b, pX->z + x, (int)chng.a[i].iStart1 - x); RC; x = chng.a[i].iStart1; rc = fdb__out(b, ",", 1); RC; rc = fdb__outj(b, pX->z + x, (int)chng.a[i].iLen1); RC; x += chng.a[i].iLen1; rc = fdb__out(b, ",", 1); RC; rc = fdb__outj(b, pY->z + chng.a[i].iStart2, (int)chng.a[i].iLen2); RC; } rc = fdb__out(b, ",", 1); RC; rc = fdb__outj(b, pX->z + x, (int)(pX->n - x)); RC; rc = fdb__out(b, "],\n",3); RC; end: return rc; #undef RC } static int fdb__json1_finish(fsl_diff_builder * const b){ return fdb__out(b, "0]}", 3); } static fsl_diff_builder * fsl__diff_builder_json1(void){ fsl_diff_builder * rc = fsl__diff_builder_alloc(0); if(rc){ rc->chunkHeader = NULL; rc->start = fdb__json1_start; rc->skip = fdb__json1_skip; rc->common = fdb__json1_common; rc->insertion = fdb__json1_insertion; rc->deletion = fdb__json1_deletion; rc->replacement = fdb__json1_replacement; rc->edit = fdb__json1_edit; rc->finish = fdb__json1_finish; rc->finalize = fdb__generic_finalize; assert(!rc->pimpl); assert(0==rc->implFlags); assert(0==rc->lnLHS); assert(0==rc->lnRHS); assert(NULL==rc->opt); } return rc; } static int fdb__utxt_start(fsl_diff_builder * const b){ int rc = 0; if(0==(FSL_DIFF2_NOINDEX & b->opt->diffFlags)){ rc = fdb__outf(b,"Index: %s\n%.66c\n", b->opt->nameLHS/*RHS?*/, '='); } if(0==rc){ rc = fdb__outf(b, "--- %s\n+++ %s\n", b->opt->nameLHS, b->opt->nameRHS); } return rc; } static int fdb__utxt_chunkHeader(fsl_diff_builder* const b, uint32_t lnnoLHS, uint32_t linesLHS, uint32_t lnnoRHS, uint32_t linesRHS ){ if(FSL_DIFF2_LINE_NUMBERS & b->opt->diffFlags){ return fdb__outf(b, "%.40c\n", '~'); }else{ return fdb__outf(b, "@@ -%" PRIu32 ",%" PRIu32 " +%" PRIu32 ",%" PRIu32 " @@\n", lnnoLHS, linesLHS, lnnoRHS, linesRHS); } } static int fdb__utxt_skip(fsl_diff_builder * const b, uint32_t n){ //MARKER(("SKIP\n")); b->lnLHS += n; b->lnRHS += n; return 0; } /** Outputs line numbers to b->opt->out. */ static int fdb__utxt_lineno(fsl_diff_builder * const b, uint32_t lnL, uint32_t lnR){ int rc = 0; if(FSL_DIFF2_LINE_NUMBERS & b->opt->diffFlags){ rc = lnL ? fdb__outf(b, "%s%6" PRIu32 "%s ", (lnR ? "" : b->opt->ansiColor.deletion), lnL, (lnR ? "" : b->opt->ansiColor.reset)) : fdb__out(b, " ", 7); if(0==rc){ rc = lnR ? fdb__outf(b, "%s%6" PRIu32 "%s ", (lnL ? "" : b->opt->ansiColor.insertion), lnR, (lnL ? "" : b->opt->ansiColor.reset)) : fdb__out(b, " ", 7); } } return rc; } static int fdb__utxt_common(fsl_diff_builder * const b, fsl_dline const * pLine){ //MARKER(("COMMON\n")); ++b->lnLHS; ++b->lnRHS; const int rc = fdb__utxt_lineno(b, b->lnLHS, b->lnRHS); return rc ? rc : fdb__outf(b, " %.*s\n", (int)pLine->n, pLine->z); } static int fdb__utxt_insertion(fsl_diff_builder * const b, fsl_dline const * pLine){ //MARKER(("INSERT\n")); ++b->lnRHS; const int rc = fdb__utxt_lineno(b, 0, b->lnRHS); return rc ? rc : fdb__outf(b, "%s+%.*s%s\n", b->opt->ansiColor.insertion, (int)pLine->n, pLine->z, b->opt->ansiColor.reset); } static int fdb__utxt_deletion(fsl_diff_builder * const b, fsl_dline const * pLine){ //MARKER(("DELETE\n")); ++b->lnLHS; const int rc = fdb__utxt_lineno(b, b->lnLHS, 0); return rc ? rc : fdb__outf(b, "%s-%.*s%s\n", b->opt->ansiColor.deletion, (int)pLine->n, pLine->z, b->opt->ansiColor.reset); } static int fdb__utxt_replacement(fsl_diff_builder * const b, fsl_dline const * lineLhs, fsl_dline const * lineRhs) { //MARKER(("REPLACE\n")); int rc = b->deletion(b, lineLhs); if(0==rc) rc = b->insertion(b, lineRhs); return rc; } static int fdb__utxt_edit(fsl_diff_builder * const b, fsl_dline const * lineLhs, fsl_dline const * lineRhs){ //MARKER(("EDIT\n")); int rc = b->deletion(b, lineLhs); if(0==rc) rc = b->insertion(b, lineRhs); return rc; } static void fdb__utxt_finalize(fsl_diff_builder * const b){ fsl_free(b); } static fsl_diff_builder * fsl__diff_builder_utxt(void){ fsl_diff_builder * rc = fsl__diff_builder_alloc(0); if(!rc) return NULL; rc->chunkHeader = fdb__utxt_chunkHeader; rc->start = fdb__utxt_start; rc->skip = fdb__utxt_skip; rc->common = fdb__utxt_common; rc->insertion = fdb__utxt_insertion; rc->deletion = fdb__utxt_deletion; rc->replacement = fdb__utxt_replacement; rc->edit = fdb__utxt_edit; rc->finish = NULL; rc->finalize = fdb__utxt_finalize; return rc; } struct DiBuTcl { /** Buffer for TCL-format string conversion */ fsl_buffer str; }; typedef struct DiBuTcl DiBuTcl; static const DiBuTcl DiBuTcl_empty = {fsl_buffer_empty_m}; #define BR_OPEN if(FSL_DIFF2_TCL_BRACES & b->opt->diffFlags) \ rc = fdb__out(b, "{", 1) #define BR_CLOSE if(FSL_DIFF2_TCL_BRACES & b->opt->diffFlags) \ rc = fdb__out(b, "}", 1) #define DTCL_BUFFER(B) &((DiBuTcl*)(B)->pimpl)->str static int fdb__outtcl(fsl_diff_builder * const b, char const *z, unsigned int n, char chAppend ){ int rc; fsl_buffer * const o = DTCL_BUFFER(b); fsl_buffer_reuse(o); rc = fsl_buffer_append_tcl_literal(o, z, n); if(0==rc) rc = fdb__out(b, (char const *)o->mem, o->used); if(chAppend && 0==rc) rc = fdb__out(b, &chAppend, 1); return rc; } static int fdb__tcl_start(fsl_diff_builder * const b){ int rc = 0; fsl_buffer_reuse(DTCL_BUFFER(b)); BR_OPEN; if(0==rc) rc = fdb__out(b, "\n", 1); if(0==rc && b->opt->nameLHS){ char const * zRHS = b->opt->nameRHS ? b->opt->nameRHS : b->opt->nameLHS; BR_OPEN; if(0==rc) rc = fdb__out(b, "FILE ", 5); if(0==rc) rc = fdb__outtcl(b, b->opt->nameLHS, (unsigned)fsl_strlen(b->opt->nameLHS), ' '); if(0==rc) rc = fdb__outtcl(b, zRHS, (unsigned)fsl_strlen(zRHS), 0); if(0==rc) {BR_CLOSE;} if(0==rc) rc = fdb__out(b, "\n", 1); } return rc; } static int fdb__tcl_skip(fsl_diff_builder * const b, uint32_t n){ int rc = 0; BR_OPEN; if(0==rc) rc = fdb__outf(b, "SKIP %" PRIu32, n); if(0==rc) {BR_CLOSE;} if(0==rc) rc = fdb__outf(b, "\n", 1); return rc; } static int fdb__tcl_common(fsl_diff_builder * const b, fsl_dline const * pLine){ int rc = 0; BR_OPEN; if(0==rc) rc = fdb__out(b, "COM ", 5); if(0==rc) rc= fdb__outtcl(b, pLine->z, pLine->n, 0); if(0==rc) {BR_CLOSE;} if(0==rc) rc = fdb__outf(b, "\n", 1); return rc; } static int fdb__tcl_insertion(fsl_diff_builder * const b, fsl_dline const * pLine){ int rc = 0; BR_OPEN; if(0==rc) rc = fdb__out(b, "INS ", 5); if(0==rc) rc = fdb__outtcl(b, pLine->z, pLine->n, 0); if(0==rc) {BR_CLOSE;} if(0==rc) rc = fdb__outf(b, "\n", 1); return rc; } static int fdb__tcl_deletion(fsl_diff_builder * const b, fsl_dline const * pLine){ int rc = 0; BR_OPEN; if(0==rc) rc = fdb__out(b, "DEL ", 5); if(0==rc) rc = fdb__outtcl(b, pLine->z, pLine->n, 0); if(0==rc) {BR_CLOSE;} if(0==rc) rc = fdb__outf(b, "\n", 1); return rc; } static int fdb__tcl_replacement(fsl_diff_builder * const b, fsl_dline const * lineLhs, fsl_dline const * lineRhs) { int rc = 0; BR_OPEN; if(0==rc) rc = fdb__out(b, "EDIT \"\" ", 8); if(0==rc) rc = fdb__outtcl(b, lineLhs->z, lineLhs->n, ' '); if(0==rc) rc = fdb__outtcl(b, lineRhs->z, lineRhs->n, 0); if(0==rc) {BR_CLOSE;} if(0==rc) rc = fdb__outf(b, "\n", 1); return rc; } static int fdb__tcl_edit(fsl_diff_builder * const b, fsl_dline const * pX, fsl_dline const * pY){ int rc = 0; int i, x; fsl_dline_change chng = fsl_dline_change_empty; #define RC if(rc) goto end BR_OPEN; rc = fdb__out(b, "EDIT", 4); RC; fsl_dline_change_spans(pX, pY, &chng); for(i=x=0; iz + x, chng.a[i].iStart1 - x, ' '); RC; x = chng.a[i].iStart1; rc = fdb__outtcl(b, pX->z + x, chng.a[i].iLen1, ' '); RC; x += chng.a[i].iLen1; rc = fdb__outtcl(b, pY->z + chng.a[i].iStart2, chng.a[i].iLen2, 0); RC; } assert(0==rc); if( x < pX->n ){ rc = fdb__out(b, " ", 1); RC; rc = fdb__outtcl(b, pX->z + x, pX->n - x, 0); RC; } BR_CLOSE; RC; rc = fdb__out(b, "\n", 1); end: #undef RC return rc; } static int fdb__tcl_finish(fsl_diff_builder * const b){ int rc = 0; BR_CLOSE; if(0==rc && FSL_DIFF2_TCL_BRACES & b->opt->diffFlags){ rc = fdb__out(b, "\n", 1); } return rc; } #undef BR_OPEN #undef BR_CLOSE static void fdb__tcl_finalize(fsl_diff_builder * const b){ fsl_buffer_clear( &((DiBuTcl*)b->pimpl)->str ); *b = fsl_diff_builder_empty; fsl_free(b); } static fsl_diff_builder * fsl__diff_builder_tcl(void){ fsl_diff_builder * rc = fsl__diff_builder_alloc((fsl_size_t)sizeof(DiBuTcl)); if(rc){ rc->chunkHeader = NULL; rc->start = fdb__tcl_start; rc->skip = fdb__tcl_skip; rc->common = fdb__tcl_common; rc->insertion = fdb__tcl_insertion; rc->deletion = fdb__tcl_deletion; rc->replacement = fdb__tcl_replacement; rc->edit = fdb__tcl_edit; rc->finish = fdb__tcl_finish; rc->finalize = fdb__tcl_finalize; assert(0!=rc->pimpl); DiBuTcl * const dbt = (DiBuTcl*)rc->pimpl; *dbt = DiBuTcl_empty; if(fsl_buffer_reserve(&dbt->str, 120)){ rc->finalize(rc); rc = 0; } } return rc; } /** Column indexes for SplitText::cols. */ enum SplitTextCols { STC_NUM1 = 0, STC_TEXT1, STC_MOD, STC_NUM2, STC_TEXT2, STC_count }; /** Internal state for the text-mode split diff builder. This builder buffers its contents in 5 buffers: 2 each for the LHS/RHS line numbers and content and one for the "change marker" (a center column). Each of the STC_NUM1, STC_NUM2, STC_TEXT1, and STC_TEXT2 columns is stored as one NUL-delimited string in each of the corresponding column buffers. STC_MOD is a simple byte array, with each byte corresponding two one line of the diff and marking what type of linle it is (common, insertion, deletion, or edit/replacement). The STC_SKIP column is managed differently. It is zero-filled, with a non-0 value at each line of the diff which represents a skipped gap. Potential TODO: combine this and STC_MOD. */ struct SplitTxt { /** Largest column width we've yet seen. These are only updated for STC_TEXT1 and STC_TEXT2. The others currently have fixed widths. FIXME: these are in bytes, not text columns. The current code may truncate multibyte characters. */ uint32_t maxWidths[STC_count]; }; typedef struct SplitTxt SplitTxt; static const SplitTxt SplitTxt_empty = {{1,10,3,1,10}}; #define SPLITSTATE(VNAME) SplitTxt * const VNAME = (SplitTxt *)b->pimpl static int maxColWidth(fsl_diff_builder const * const b, SplitTxt const * const sst, int mwIndex){ static const short minColWidth = 10/*b->opt.columnWidth values smaller than this are treated as this value*/; switch(mwIndex){ case STC_NUM1: case STC_NUM2: case STC_MOD: return sst->maxWidths[mwIndex]; case STC_TEXT1: case STC_TEXT2: break; default: assert(!"internal API misuse: invalid column index."); break; } int const y = (b->opt->columnWidth>0 && b->opt->columnWidth<=sst->maxWidths[mwIndex]) ? (int)b->opt->columnWidth : (int)sst->maxWidths[mwIndex]; return minColWidth > y ? minColWidth : y; } static int fdb__splittxt_mod(fsl_diff_builder * const b, char ch){ assert(2==b->passNumber); return fdb__outf(b, " %c ", ch); } static int fdb__splittxt_lineno(fsl_diff_builder * const b, SplitTxt const * const sst, bool isLeft, uint32_t n){ assert(2==b->passNumber); int const col = isLeft ? STC_NUM1 : STC_NUM2; return n ? fdb__outf(b, "%*" PRIu32 " ", sst->maxWidths[col], n) : fdb__outf(b, "%.*c ", sst->maxWidths[col], ' '); } static int fdb__splittxt_start(fsl_diff_builder * const b){ int rc = 0; if(1==b->passNumber){ SPLITSTATE(sst); *sst = SplitTxt_empty; ++b->fileCount; return rc; } if(b->fileCount>1){ rc = fdb__out(b, "\n", 1); } if(0==rc){ fsl_diff_opt const * const o = b->opt; if(o->nameLHS || o->nameRHS || o->hashLHS || o->hashRHS){ rc = fdb__outf(b, "--- %s%s%s\n+++ %s%s%s\n", o->nameLHS ? o->nameLHS : "", (o->nameLHS && o->hashLHS) ? " " : "", o->hashLHS ? o->hashLHS : "", o->nameRHS ? o->nameRHS : "", (o->nameRHS && o->hashRHS) ? " " : "", o->hashRHS ? o->hashRHS : ""); } } return rc; } static int fdb__splittxt_skip(fsl_diff_builder * const b, uint32_t n){ b->lnLHS += n; b->lnRHS += n; if(1==b->passNumber) return 0; SPLITSTATE(sst); int const maxWidth1 = maxColWidth(b, sst, STC_TEXT1); int const maxWidth2 = maxColWidth(b, sst, STC_TEXT2); return fdb__outf(b, "%.*c %.*c %.*c %.*c\n", sst->maxWidths[STC_NUM1], '~', maxWidth1, '~', sst->maxWidths[STC_NUM2], '~', maxWidth2, '~'); } static int fdb__splittxt_color(fsl_diff_builder * const b, int modType){ char const *z = 0; switch(modType){ case (int)'i': z = b->opt->ansiColor.insertion; break; case (int)'d': z = b->opt->ansiColor.deletion; break; case (int)'r'/*replacement*/: case (int)'e': z = b->opt->ansiColor.edit; break; case 0: z = b->opt->ansiColor.reset; break; default: assert(!"invalid color op!"); } return z&&*z ? fdb__outf(b, "%s", z) : 0; } static int fdb__splittxt_side(fsl_diff_builder * const b, SplitTxt * const sst, bool isLeft, fsl_dline const * const pLine){ int rc = fdb__splittxt_lineno(b, sst, isLeft, pLine ? (isLeft ? b->lnLHS : b->lnRHS) : 0U); if(0==rc){ uint32_t const w = maxColWidth(b, sst, isLeft ? STC_TEXT1 : STC_TEXT2); if(pLine){ fsl_size_t const nU = /* Measure column width in UTF8 characters, not bytes! */ fsl_strlen_utf8(pLine->z, (fsl_int_t)pLine->n); rc = fdb__outf(b, "%#.*s", (int)(w < nU ? w : nU), pLine->z); if(0==rc && w>nU){ rc = fdb__outf(b, "%.*c", (int)(w - nU), ' '); } }else{ rc = fdb__outf(b, "%.*c", (int)w, ' '); } if(0==rc && !isLeft) rc = fdb__out(b, "\n", 1); } return rc; } static void fdb__splittext_update_maxlen(SplitTxt * const sst, int col, char const * const z, uint32_t n){ if(sst->maxWidths[col]maxWidths[col] = n; #else n = (uint32_t)fsl_strlen_utf8(z, (fsl_int_t)n); if(sst->maxWidths[col]maxWidths[col] = n; #endif } } static int fdb__splittxt_common(fsl_diff_builder * const b, fsl_dline const * const pLine){ int rc = 0; SPLITSTATE(sst); ++b->lnLHS; ++b->lnRHS; if(1==b->passNumber){ fdb__splittext_update_maxlen(sst, STC_TEXT1, pLine->z, pLine->n); fdb__splittext_update_maxlen(sst, STC_TEXT2, pLine->z, pLine->n); return 0; } rc = fdb__splittxt_side(b, sst, true, pLine); if(0==rc) rc = fdb__splittxt_mod(b, ' '); if(0==rc) rc = fdb__splittxt_side(b, sst, false, pLine); return rc; } static int fdb__splittxt_insertion(fsl_diff_builder * const b, fsl_dline const * const pLine){ int rc = 0; SPLITSTATE(sst); ++b->lnRHS; if(1==b->passNumber){ fdb__splittext_update_maxlen(sst, STC_TEXT1, pLine->z, pLine->n); return rc; } rc = fdb__splittxt_color(b, 'i'); if(0==rc) rc = fdb__splittxt_side(b, sst, true, NULL); if(0==rc) rc = fdb__splittxt_mod(b, '>'); if(0==rc) rc = fdb__splittxt_side(b, sst, false, pLine); if(0==rc) rc = fdb__splittxt_color(b, 0); return rc; } static int fdb__splittxt_deletion(fsl_diff_builder * const b, fsl_dline const * const pLine){ int rc = 0; SPLITSTATE(sst); ++b->lnLHS; if(1==b->passNumber){ fdb__splittext_update_maxlen(sst, STC_TEXT2, pLine->z, pLine->n); return rc; } rc = fdb__splittxt_color(b, 'd'); if(0==rc) rc = fdb__splittxt_side(b, sst, true, pLine); if(0==rc) rc = fdb__splittxt_mod(b, '<'); if(0==rc) rc = fdb__splittxt_side(b, sst, false, NULL); if(0==rc) rc = fdb__splittxt_color(b, 0); return rc; } static int fdb__splittxt_replacement(fsl_diff_builder * const b, fsl_dline const * const lineLhs, fsl_dline const * const lineRhs) { #if 0 int rc = b->deletion(b, lineLhs); if(0==rc) rc = b->insertion(b, lineRhs); return rc; #else int rc = 0; SPLITSTATE(sst); ++b->lnLHS; ++b->lnRHS; if(1==b->passNumber){ fdb__splittext_update_maxlen(sst, STC_TEXT1, lineLhs->z, lineLhs->n); fdb__splittext_update_maxlen(sst, STC_TEXT2, lineRhs->z, lineRhs->n); return 0; } rc = fdb__splittxt_color(b, 'e'); if(0==rc) rc = fdb__splittxt_side(b, sst, true, lineLhs); if(0==rc) rc = fdb__splittxt_mod(b, '|'); if(0==rc) rc = fdb__splittxt_side(b, sst, false, lineRhs); if(0==rc) rc = fdb__splittxt_color(b, 0); return rc; #endif } static int fdb__splittxt_finish(fsl_diff_builder * const b){ int rc = 0; if(1==b->passNumber){ SPLITSTATE(sst); uint32_t ln = b->lnLHS; /* Calculate width of line number columns. */ sst->maxWidths[STC_NUM1] = sst->maxWidths[STC_NUM2] = 1; for(; ln>=10; ln/=10) ++sst->maxWidths[STC_NUM1]; ln = b->lnRHS; for(; ln>=10; ln/=10) ++sst->maxWidths[STC_NUM2]; } return rc; } static void fdb__splittxt_finalize(fsl_diff_builder * const b){ *b = fsl_diff_builder_empty; fsl_free(b); } static fsl_diff_builder * fsl__diff_builder_splittxt(void){ fsl_diff_builder * rc = fsl__diff_builder_alloc((fsl_size_t)sizeof(SplitTxt)); if(rc){ rc->twoPass = true; rc->chunkHeader = NULL; rc->start = fdb__splittxt_start; rc->skip = fdb__splittxt_skip; rc->common = fdb__splittxt_common; rc->insertion = fdb__splittxt_insertion; rc->deletion = fdb__splittxt_deletion; rc->replacement = fdb__splittxt_replacement; rc->edit = fdb__splittxt_replacement; rc->finish = fdb__splittxt_finish; rc->finalize = fdb__splittxt_finalize; assert(0!=rc->pimpl); SplitTxt * const sst = (SplitTxt*)rc->pimpl; *sst = SplitTxt_empty; } return rc; } int fsl_diff_builder_factory( fsl_diff_builder_e type, fsl_diff_builder **pOut ){ int rc = FSL_RC_TYPE; fsl_diff_builder * (*factory)(void) = NULL; switch(type){ case FSL_DIFF_BUILDER_DEBUG: factory = fsl__diff_builder_debug; break; case FSL_DIFF_BUILDER_JSON1: factory = fsl__diff_builder_json1; break; case FSL_DIFF_BUILDER_UNIFIED_TEXT: factory = fsl__diff_builder_utxt; break; case FSL_DIFF_BUILDER_TCL: factory = fsl__diff_builder_tcl; break; case FSL_DIFF_BUILDER_SPLIT_TEXT: factory = fsl__diff_builder_splittxt; break; } if(NULL!=factory){ *pOut = factory(); rc = *pOut ? 0 : FSL_RC_OOM; } return rc; } #undef DIFF_ALIGN_MX #undef DIFF_CANNOT_COMPUTE_BINARY #undef DIFF_CANNOT_COMPUTE_SYMLINK #undef DIFF_TOO_MANY_CHANGES #undef DIFF_WHITESPACE_ONLY #undef fsl_dline_empty_m #undef MARKER #undef DTCL_BUFFER #undef blob_to_utf8_no_bom #undef SPLITSTATE