/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim: set ts=2 et sw=2 tw=80: */ /* Copyright 2013-2021 The Libfossil Authors, see LICENSES/BSD-2-Clause.txt SPDX-License-Identifier: BSD-2-Clause-FreeBSD SPDX-FileCopyrightText: 2021 The Libfossil Authors SPDX-ArtifactOfProjectName: Libfossil SPDX-FileType: Code Heavily indebted to the Fossil SCM project (https://fossil-scm.org). */ /** This file houses the "2nd generation" diff-generation routines APIs. This code is a straight port of those algorithms from the Fossil SCM project. */ #include "fossil-scm/fossil.h" #include #include #include #include /* for memmove()/strlen() */ const fsl_diff_cfg fsl_diff_cfg_empty = fsl_diff_cfg_empty_m; const fsl_diff_builder fsl_diff_builder_empty = fsl_diff_builder_empty_m; const fsl_dline fsl_dline_empty = fsl_dline_empty_m; const fsl_dline_change fsl_dline_change_empty = fsl_dline_change_empty_m; #define DLn fsl_dline /* Porting crutch for to-be-static functions */ #define TO_BE_STATIC /* ** Maximum length of a line in a text file, in bytes. (2**15 = 32768 bytes) */ #define LENGTH_MASK_SZ 15 #define LENGTH_MASK ((1<0 || z[0]=='\0' ); if(nLine>0){ a = fsl_malloc( sizeof(a[0])*nLine ); if(!a) return FSL_RC_OOM; memset(a, 0, sizeof(a[0])*nLine); }else{ *pnLine = 0; *pOut = a; return 0; } assert( a ); i = 0; do{ zNL = strchr(z,'\n'); if( zNL==0 ) zNL = z+n; nn = (uint32_t)(zNL - z); if( nn>LENGTH_MASK ){ fsl_free(a); *pOut = 0; *pnLine = 0; return FSL_RC_TYPE; } a[i].z = z; k = nn; if( diffFlags & FSL_DIFF2_STRIP_EOLCR ){ if( k>0 && z[k-1]=='\r' ){ k--; } } a[i].n = k; s = 0; if( diffFlags & FSL_DIFF2_IGNORE_EOLWS ){ while( k>0 && fsl_isspace(z[k-1]) ){ k--; } } if( (diffFlags & FSL_DIFF2_IGNORE_ALLWS) ==FSL_DIFF2_IGNORE_ALLWS ){ uint32_t numws = 0; while( sh!=pB->h ) return 1; return memcmp(pA->z,pB->z, pA->h&LENGTH_MASK); } /* ** Return zero if two DLine elements are identical, ignoring ** all whitespace. The indent field of pA/pB already points ** to the first non-space character in the string. */ TO_BE_STATIC int compare_dline_ignore_allws(const DLn *pA, const DLn *pB){ unsigned short a = pA->indent, b = pB->indent; if( pA->h==pB->h ){ while( an || bn ){ if( an && bn && pA->z[a++] != pB->z[b++] ) return 1; while( an && fsl_isspace(pA->z[a])) ++a; while( bn && fsl_isspace(pB->z[b])) ++b; } return pA->n-a != pB->n-b; } return 1; } /* ** The two text segments zLeft and zRight are known to be different on ** both ends, but they might have a common segment in the middle. If ** they do not have a common segment, return 0. If they do have a large ** common segment, return non-0 and before doing so set: ** ** aLCS[0] = start of the common segment in zLeft ** aLCS[1] = end of the common segment in zLeft ** aLCS[2] = start of the common segment in zLeft ** aLCS[3] = end of the common segment in zLeft ** ** This computation is for display purposes only and does not have to be ** optimal or exact. */ TO_BE_STATIC int textLCS2( const char *zLeft, uint32_t nA, /* String on the left */ const char *zRight, uint32_t nB, /* String on the right */ uint32_t *aLCS /* Identify bounds of LCS here */ ){ const unsigned char *zA = (const unsigned char*)zLeft; /* left string */ const unsigned char *zB = (const unsigned char*)zRight; /* right string */ uint32_t i, j, k; /* Loop counters */ uint32_t lenBest = 0; /* Match length to beat */ for(i=0; ilenBest ){ lenBest = k; aLCS[0] = i; aLCS[1] = i+k; aLCS[2] = j; aLCS[3] = j+k; } } } } return lenBest>0; } /* ** Find the smallest spans that are different between two text strings ** that are known to be different on both ends. Returns the number ** of entries in p->a which get populated. */ TO_BE_STATIC unsigned short textLineChanges( const char *zLeft, uint32_t nA, /* String on the left */ const char *zRight, uint32_t nB, /* String on the right */ fsl_dline_change * const p /* Write results here */ ){ p->n = 1; p->a[0].iStart1 = 0; p->a[0].iLen1 = nA; p->a[0].iStart2 = 0; p->a[0].iLen2 = nB; p->a[0].isMin = 0; while( p->nn; i++){ if( p->a[i].isMin ) continue; x = p->a[i].iLen1; if( p->a[i].iLen2a[i].iLen2; if( x>mxLen ){ mxLen = x; mxi = i; } } if( mxLen<6 ) break; x = textLCS2(zLeft + p->a[mxi].iStart1, p->a[mxi].iLen1, zRight + p->a[mxi].iStart2, p->a[mxi].iLen2, aLCS); if( x==0 ){ p->a[mxi].isMin = 1; continue; } a = p->a+mxi; b = a+1; if( mxin-1 ){ memmove(b+1, b, sizeof(*b)*(p->n-mxi-1)); } p->n++; b->iStart1 = a->iStart1 + aLCS[1]; b->iLen1 = a->iLen1 - aLCS[1]; a->iLen1 = aLCS[0]; b->iStart2 = a->iStart2 + aLCS[3]; b->iLen2 = a->iLen2 - aLCS[3]; a->iLen2 = aLCS[2]; b->isMin = 0; } return p->n; } /* ** Return true if the string starts with n spaces */ TO_BE_STATIC int allSpaces(const char *z, int n){ int i; for(i=0; in<1 ) return; /* (1) Attempt to move indentation changes to the left margin */ if( p->a[0].iLen1==0 && (len = p->a[0].iLen2)>0 && (j = p->a[0].iStart2)>0 && zB[0]==zB[j] && allSpaces(zB, j) ){ for(n=1; na[1], &p->a[0], sizeof(p->a[0])*p->n); p->n++; p->a[0] = p->a[1]; p->a[1].iStart2 += n; p->a[1].iLen2 -= n; p->a[0].iLen2 = n; } p->a[0].iStart1 = 0; p->a[0].iStart2 = 0; }else if( p->a[0].iLen2==0 && (len = p->a[0].iLen1)>0 && (j = p->a[0].iStart1)>0 && zA[0]==zA[j] && allSpaces(zA, j) ){ for(n=1; na[1], &p->a[0], sizeof(p->a[0])*p->n); p->n++; p->a[0] = p->a[1]; p->a[1].iStart1 += n; p->a[1].iLen1 -= n; p->a[0].iLen1 = n; } p->a[0].iStart1 = 0; p->a[0].iStart2 = 0; } /* (2) Try to shift changes so that they begin or end with a ** space. (TBD) */ } /* ** Given two lines of text, pFrom and pTo, compute a set of changes ** between those two lines, for enhanced display purposes. ** ** The result is written into the fsl_dline_change object given by the ** third parameter. */ void fsl_dline_change_spans( const DLn *pLeft, /* Left line of the change */ const DLn *pRight, /* Right line of the change */ fsl_dline_change * const p /* OUTPUT: Write the results here */ ){ int nLeft; /* Length of left line in bytes */ int nRight; /* Length of right line in bytes */ int nShort; /* Shortest of left and right */ int nPrefix; /* Length of common prefix */ int nSuffix; /* Length of common suffix */ int nCommon; /* Total byte length of suffix and prefix */ const char *zLeft; /* Text of the left line */ const char *zRight; /* Text of the right line */ int nLeftDiff; /* nLeft - nPrefix - nSuffix */ int nRightDiff; /* nRight - nPrefix - nSuffix */ nLeft = pLeft->n; zLeft = pLeft->z; nRight = pRight->n; zRight = pRight->z; nShort = nLeft0 && (zLeft[nPrefix]&0xc0)==0x80 ) nPrefix--; } nSuffix = 0; if( nPrefix0 && (zLeft[nLeft-nSuffix]&0xc0)==0x80 ) nSuffix--; } if( nSuffix==nLeft || nSuffix==nRight ) nPrefix = 0; } nCommon = nPrefix + nSuffix; /* If the prefix and suffix overlap, that means that we are dealing with ** a pure insertion or deletion of text that can have multiple alignments. ** Try to find an alignment to begins and ends on whitespace, or on ** punctuation, rather than in the middle of a name or number. */ if( nCommon > nShort ){ int iBest = -1; int iBestVal = -1; int i; int nLong = nLeftiBestVal ){ iBestVal = iVal; iBest = i; } } nPrefix = iBest; nSuffix = nShort - nPrefix; nCommon = nPrefix + nSuffix; } /* A single chunk of text inserted */ if( nCommon==nLeft ){ p->n = 1; p->a[0].iStart1 = nPrefix; p->a[0].iLen1 = 0; p->a[0].iStart2 = nPrefix; p->a[0].iLen2 = nRight - nCommon; improveReadability(zLeft, zRight, p); return; } /* A single chunk of text deleted */ if( nCommon==nRight ){ p->n = 1; p->a[0].iStart1 = nPrefix; p->a[0].iLen1 = nLeft - nCommon; p->a[0].iStart2 = nPrefix; p->a[0].iLen2 = 0; improveReadability(zLeft, zRight, p); return; } /* At this point we know that there is a chunk of text that has ** changed between the left and the right. Check to see if there ** is a large unchanged section in the middle of that changed block. */ nLeftDiff = nLeft - nCommon; nRightDiff = nRight - nCommon; if( nLeftDiff >= 4 && nRightDiff >= 4 && textLineChanges(&zLeft[nPrefix], nLeftDiff, &zRight[nPrefix], nRightDiff, p)>1 ){ int i; for(i=0; in; i++){ p->a[i].iStart1 += nPrefix; p->a[i].iStart2 += nPrefix; } improveReadability(zLeft, zRight, p); return; } /* If all else fails, show a single big change between left and right */ p->n = 1; p->a[0].iStart1 = nPrefix; p->a[0].iLen1 = nLeft - nCommon; p->a[0].iStart2 = nPrefix; p->a[0].iLen2 = nRight - nCommon; improveReadability(zLeft, zRight, p); } #undef MX_CSN #undef DCx #undef DLn #undef DIFF_CANNOT_COMPUTE_BINARY #undef DIFF_CANNOT_COMPUTE_SYMLINK #undef DIFF_TOO_MANY_CHANGES #undef DIFF_WHITESPACE_ONLY #undef LENGTH_MASK_SZ #undef LENGTH_MASK #undef fsl_dline_empty_m #undef TO_BE_STATIC