Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Diff algorithm is slightly faster and does a better job of dealing with indentation changes in code. See forum thread 7631656a2823338a. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA3-256: |
1cb182ac18de0bb78c060c9641ba87b0 |
User & Date: | drh 2022-01-23 20:11:50 |
Context
2022-01-24
| ||
06:54 | Replaced the "manual" TLS EOF tracking with BIO_eof(), analog to how is done in althttpd. ... (check-in: 06e300e5 user: stephan tags: trunk) | |
2022-01-23
| ||
20:11 | Diff algorithm is slightly faster and does a better job of dealing with indentation changes in code. See forum thread 7631656a2823338a. ... (check-in: 1cb182ac user: drh tags: trunk) | |
19:57 | Add a heuristic to the diff generator that helps it do a better job of identifying differences in C code that result from a change in indentation level. ... (Closed-Leaf check-in: 8cd73dda user: drh tags: diff-improvement) | |
12:52 | Fix bullets in wsl_caveats.wiki ... (check-in: ea6b2d3e user: larrybr tags: trunk) | |
Changes
Changes to src/diff.c.
︙ | ︙ | |||
119 120 121 122 123 124 125 | ** of the line. If any line is longer than LENGTH_MASK characters, ** the file is considered binary. */ typedef struct DLine DLine; struct DLine { const char *z; /* The text of the line */ u64 h; /* Hash of the line */ | | > | 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 | ** of the line. If any line is longer than LENGTH_MASK characters, ** the file is considered binary. */ typedef struct DLine DLine; struct DLine { const char *z; /* The text of the line */ u64 h; /* Hash of the line */ unsigned short indent; /* Index of first non-space */ unsigned short n; /* number of bytes */ unsigned short nw; /* number of bytes without leading/trailing space */ unsigned int iNext; /* 1+(Index of next line with same the same hash) */ /* an array of DLine elements serves two purposes. The fields ** above are one per line of input text. But each entry is also ** a bucket in a hash table, as follows: */ unsigned int iHash; /* 1+(first entry in the hash chain) */ }; |
︙ | ︙ | |||
161 162 163 164 165 166 167 | int *aEdit; /* Array of copy/delete/insert triples */ int nEdit; /* Number of integers (3x num of triples) in aEdit[] */ int nEditAlloc; /* Space allocated for aEdit[] */ DLine *aFrom; /* File on left side of the diff */ int nFrom; /* Number of lines in aFrom[] */ DLine *aTo; /* File on right side of the diff */ int nTo; /* Number of lines in aTo[] */ | | > > > > > > > > > > > > > > > > > > > > > > | 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 | int *aEdit; /* Array of copy/delete/insert triples */ int nEdit; /* Number of integers (3x num of triples) in aEdit[] */ int nEditAlloc; /* Space allocated for aEdit[] */ DLine *aFrom; /* File on left side of the diff */ int nFrom; /* Number of lines in aFrom[] */ DLine *aTo; /* File on right side of the diff */ int nTo; /* Number of lines in aTo[] */ int (*xDiffer)(const DLine *,const DLine *); /* comparison function */ }; /* Fast isspace for use by diff */ static const char diffIsSpace[] = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; #define diff_isspace(X) (diffIsSpace[(unsigned char)(X)]) /* ** Count the number of lines in the input string. Include the last line ** in the count even if it lacks the \n terminator. If an empty string ** is specified, the number of lines is zero. For the purposes of this ** function, a string is considered empty if it contains no characters ** -OR- it contains only NUL characters. |
︙ | ︙ | |||
243 244 245 246 247 248 249 | } a[i].z = z; k = nn; if( diffFlags & DIFF_STRIP_EOLCR ){ if( k>0 && z[k-1]=='\r' ){ k--; } } a[i].n = k; | < | | > > | | < | 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 | } a[i].z = z; k = nn; if( diffFlags & DIFF_STRIP_EOLCR ){ if( k>0 && z[k-1]=='\r' ){ k--; } } a[i].n = k; if( diffFlags & DIFF_IGNORE_EOLWS ){ while( k>0 && diff_isspace(z[k-1]) ){ k--; } } if( (diffFlags & DIFF_IGNORE_ALLWS)==DIFF_IGNORE_ALLWS ){ int numws = 0; for(s=0; s<k && z[s]<=' '; s++){} a[i].indent = s; a[i].nw = k - s; for(h=0, x=s; x<k; x++){ char c = z[x]; if( diff_isspace(c) ){ ++numws; }else{ h = (h^c)*9000000000000000041LL; } } k -= numws; }else{ int k2 = k & ~0x7; u64 m; for(h=x=s=0; x<k2; x += 8){ memcpy(&m, z+x, 8); h = (h^m)*9000000000000000041LL; } m = 0; memcpy(&m, z+x, k-k2); h ^= m; } a[i].h = h = ((h%281474976710597LL)<<LENGTH_MASK_SZ) | (k-s); h2 = h % nLine; a[i].iNext = a[h2].iHash; a[h2].iHash = i+1; z += nn+1; n -= nn+1; i++; }while( zNL[0]!='\0' && zNL[1]!='\0' ); |
︙ | ︙ | |||
298 299 300 301 302 303 304 | } /* ** Return zero if two DLine elements are identical, ignoring ** all whitespace. The indent field of pA/pB already points ** to the first non-space character in the string. */ | < < > > > > | | | 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 | } /* ** Return zero if two DLine elements are identical, ignoring ** all whitespace. The indent field of pA/pB already points ** to the first non-space character in the string. */ static int compare_dline_ignore_allws(const DLine *pA, const DLine *pB){ if( pA->h==pB->h ){ int a, b; if( memcmp(pA->z, pB->z, pA->h&LENGTH_MASK)==0 ) return 0; a = pA->indent; b = pB->indent; while( a<pA->n || b<pB->n ){ if( a<pA->n && b<pB->n && pA->z[a++] != pB->z[b++] ) return 1; while( a<pA->n && diff_isspace(pA->z[a])) ++a; while( b<pB->n && diff_isspace(pB->z[b])) ++b; } return pA->n-a != pB->n-b; } return 1; } /* |
︙ | ︙ | |||
336 337 338 339 340 341 342 | /* ** Append a single line of context-diff output to pOut. */ static void appendDiffLine( Blob *pOut, /* Where to write the line of output */ char cPrefix, /* One of " ", "+", or "-" */ | | | 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 | /* ** Append a single line of context-diff output to pOut. */ static void appendDiffLine( Blob *pOut, /* Where to write the line of output */ char cPrefix, /* One of " ", "+", or "-" */ const DLine *pLine /* The line to be output */ ){ blob_append_char(pOut, cPrefix); blob_append(pOut, pLine->z, pLine->n); blob_append_char(pOut, '\n'); } /* |
︙ | ︙ | |||
369 370 371 372 373 374 375 | ** Output a patch-style text diff. */ static void contextDiff( DContext *p, /* The difference */ Blob *pOut, /* Output a context diff to here */ DiffConfig *pCfg /* Configuration options */ ){ | | | | | | 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 | ** Output a patch-style text diff. */ static void contextDiff( DContext *p, /* The difference */ Blob *pOut, /* Output a context diff to here */ DiffConfig *pCfg /* Configuration options */ ){ const DLine *A; /* Left side of the diff */ const DLine *B; /* Right side of the diff */ int a = 0; /* Index of next line in A[] */ int b = 0; /* Index of next line in B[] */ int *R; /* Array of COPY/DELETE/INSERT triples */ int r; /* Index into R[] */ int nr; /* Number of COPY/DELETE/INSERT triples to process */ int mxr; /* Maximum value for r */ int na, nb; /* Number of lines shown from A and B */ int i, j; /* Loop counters */ int m; /* Number of lines to output */ |
︙ | ︙ | |||
617 618 619 620 621 622 623 | } /* ** Return true if the string starts with n spaces */ static int allSpaces(const char *z, int n){ int i; | | | 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 | } /* ** Return true if the string starts with n spaces */ static int allSpaces(const char *z, int n){ int i; for(i=0; i<n && diff_isspace(z[i]); i++){} return i==n; } /* ** Try to improve the human-readability of the LineChange p. ** ** (1) If the first change span shows a change of indentation, try to |
︙ | ︙ | |||
743 744 745 746 747 748 749 | int iBestVal = -1; int i; int nLong = nLeft<nRight ? nRight : nLeft; int nGap = nLong - nShort; for(i=nShort-nSuffix; i<=nPrefix; i++){ int iVal = 0; char c = zLeft[i]; | | | | 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 | int iBestVal = -1; int i; int nLong = nLeft<nRight ? nRight : nLeft; int nGap = nLong - nShort; for(i=nShort-nSuffix; i<=nPrefix; i++){ int iVal = 0; char c = zLeft[i]; if( diff_isspace(c) ){ iVal += 5; }else if( !fossil_isalnum(c) ){ iVal += 2; } c = zLeft[i+nGap-1]; if( diff_isspace(c) ){ iVal += 5; }else if( !fossil_isalnum(c) ){ iVal += 2; } if( iVal>iBestVal ){ iBestVal = iVal; iBest = i; |
︙ | ︙ | |||
887 888 889 890 891 892 893 | */ typedef struct DiffBuilder DiffBuilder; struct DiffBuilder { void (*xSkip)(DiffBuilder*, unsigned int, int); void (*xCommon)(DiffBuilder*,const DLine*); void (*xInsert)(DiffBuilder*,const DLine*); void (*xDelete)(DiffBuilder*,const DLine*); | | | 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 | */ typedef struct DiffBuilder DiffBuilder; struct DiffBuilder { void (*xSkip)(DiffBuilder*, unsigned int, int); void (*xCommon)(DiffBuilder*,const DLine*); void (*xInsert)(DiffBuilder*,const DLine*); void (*xDelete)(DiffBuilder*,const DLine*); void (*xReplace)(DiffBuilder*,const DLine*,const DLine*); void (*xEdit)(DiffBuilder*,const DLine*,const DLine*); void (*xEnd)(DiffBuilder*); unsigned int lnLeft; /* Lines seen on the left (delete) side */ unsigned int lnRight; /* Lines seen on the right (insert) side */ unsigned int nPending; /* Number of pending lines */ int eState; /* State of the output */ int width; /* Display width */ |
︙ | ︙ | |||
1731 1732 1733 1734 1735 1736 1737 | ** (1) Remove leading and trailing whitespace. ** (2) Truncate both strings to at most 250 characters ** (3) If the two strings have a common prefix, measure that prefix ** (4) Find the length of the longest common subsequence that is ** at least 150% longer than the common prefix. ** (5) Longer common subsequences yield lower scores. */ | | > > > > > > > > > > > | > > > > | | < < < | 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 | ** (1) Remove leading and trailing whitespace. ** (2) Truncate both strings to at most 250 characters ** (3) If the two strings have a common prefix, measure that prefix ** (4) Find the length of the longest common subsequence that is ** at least 150% longer than the common prefix. ** (5) Longer common subsequences yield lower scores. */ static int match_dline(DLine *pA, DLine *pB){ const char *zA; /* Left string */ const char *zB; /* right string */ int nA; /* Bytes in zA[] */ int nB; /* Bytes in zB[] */ int nMin; int nPrefix; int avg; /* Average length of A and B */ int i, j, k; /* Loop counters */ int best = 0; /* Longest match found so far */ int score; /* Final score. 0..100 */ unsigned char c; /* Character being examined */ unsigned char aFirst[256]; /* aFirst[X] = index in zB[] of first char X */ unsigned char aNext[252]; /* aNext[i] = index in zB[] of next zB[i] char */ zA = pA->z; if( pA->nw==0 && pA->n ){ for(i=0; i<pA->n && diff_isspace(zA[i]); i++){} pA->indent = i; for(j=pA->n-1; j>i && diff_isspace(zA[j]); j--){} pA->nw = j - i + 1; } zA += pA->indent; nA = pA->nw; zB = pB->z; if( pB->nw==0 && pB->n ){ for(i=0; i<pB->n && diff_isspace(zB[i]); i++){} pB->indent = i; for(j=pB->n-1; j>i && diff_isspace(zB[j]); j--){} pB->nw = j - i + 1; } zB += pB->indent; nB = pB->nw; if( nA>250 ) nA = 250; if( nB>250 ) nB = 250; avg = (nA+nB)/2; if( avg==0 ) return 0; nMin = nA; if( nB<nMin ) nMin = nB; if( nMin==0 ) return 68; |
︙ | ︙ | |||
1783 1784 1785 1786 1787 1788 1789 | c = (unsigned char)zA[i]; for(j=aFirst[c]; j<nB-best && memcmp(&zA[i],&zB[j],best)==0; j = aNext[j]){ int limit = minInt(nA-i, nB-j); for(k=best; k<=limit && zA[k+i]==zB[k+j]; k++){} if( k>best ) best = k; } } | | | 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 | c = (unsigned char)zA[i]; for(j=aFirst[c]; j<nB-best && memcmp(&zA[i],&zB[j],best)==0; j = aNext[j]){ int limit = minInt(nA-i, nB-j); for(k=best; k<=limit && zA[k+i]==zB[k+j]; k++){} if( k>best ) best = k; } } score = 5 + ((best>=avg) ? 0 : (avg - best)*95/avg); #if 0 fprintf(stderr, "A: [%.*s]\nB: [%.*s]\nbest=%d avg=%d score=%d\n", nA, zA+1, nB, zB+1, best, avg, score); #endif /* Return the result */ |
︙ | ︙ | |||
1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 | a.z = g.argv[2]; a.n = (int)strlen(a.z); b.z = g.argv[3]; b.n = (int)strlen(b.z); x = match_dline(&a, &b); fossil_print("%d\n", x); } /* ** The threshold at which diffBlockAlignment transitions from the ** O(N*N) Wagner minimum-edit-distance algorithm to a less process ** O(NlogN) divide-and-conquer approach. */ #define DIFF_ALIGN_MX 1225 | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 | a.z = g.argv[2]; a.n = (int)strlen(a.z); b.z = g.argv[3]; b.n = (int)strlen(b.z); x = match_dline(&a, &b); fossil_print("%d\n", x); } /* Forward declarations for recursion */ static unsigned char *diffBlockAlignment( DLine *aLeft, int nLeft, /* Text on the left */ DLine *aRight, int nRight, /* Text on the right */ DiffConfig *pCfg, /* Configuration options */ int *pNResult /* OUTPUT: Bytes of result */ ); static void longestCommonSequence( DContext *p, /* Two files being compared */ int iS1, int iE1, /* Range of lines in p->aFrom[] */ int iS2, int iE2, /* Range of lines in p->aTo[] */ int *piSX, int *piEX, /* Write p->aFrom[] common segment here */ int *piSY, int *piEY /* Write p->aTo[] common segment here */ ); /* ** Make a copy of a list of nLine DLine objects from one array to ** another. Hash the new array to ignore whitespace. */ static void diffDLineXfer( DLine *aTo, const DLine *aFrom, int nLine ){ int i, j, k; u64 h, h2; for(i=0; i<nLine; i++) aTo[i].iHash = 0; for(i=0; i<nLine; i++){ const char *z = aFrom[i].z; int n = aFrom[i].n; for(j=0; j<n && diff_isspace(z[j]); j++){} aTo[i].z = &z[j]; for(k=aFrom[i].n; k>j && diff_isspace(z[k-1]); k--){} aTo[i].n = n = k-j; aTo[i].indent = 0; aTo[i].nw = 0; for(h=0; j<k; j++){ char c = z[j]; if( !diff_isspace(c) ){ h = (h^c)*9000000000000000041LL; } } aTo[i].h = h = ((h%281474976710597LL)<<LENGTH_MASK_SZ) | n; h2 = h % nLine; aTo[i].iNext = aTo[h2].iHash; aTo[h2].iHash = i+1; } } /* ** For a difficult diff-block alignment that was originally for ** the default consider-all-whitespace algorithm, try to find the ** longest common subsequence between the two blocks that involves ** only whitespace changes. */ static unsigned char *diffBlockAlignmentIgnoreSpace( DLine *aLeft, int nLeft, /* Text on the left */ DLine *aRight, int nRight, /* Text on the right */ DiffConfig *pCfg, /* Configuration options */ int *pNResult /* OUTPUT: Bytes of result */ ){ DContext dc; int iSX, iEX; /* Start and end of LCS on the left */ int iSY, iEY; /* Start and end of the LCS on the right */ unsigned char *a1, *a2; int n1, n2, nLCS; dc.aEdit = 0; dc.nEdit = 0; dc.nEditAlloc = 0; dc.nFrom = nLeft; dc.nTo = nRight; dc.xDiffer = compare_dline_ignore_allws; dc.aFrom = fossil_malloc( sizeof(DLine)*(nLeft+nRight) ); dc.aTo = &dc.aFrom[dc.nFrom]; diffDLineXfer(dc.aFrom, aLeft, nLeft); diffDLineXfer(dc.aTo, aRight, nRight); longestCommonSequence(&dc,0,nLeft,0,nRight,&iSX,&iEX,&iSY,&iEY); fossil_free(dc.aFrom); nLCS = iEX - iSX; if( nLCS<5 ) return 0; /* No good LCS was found */ if( pCfg->diffFlags & DIFF_DEBUG ){ fossil_print(" LCS size=%d\n" " [%.*s]\n" " [%.*s]\n", nLCS, aLeft[iSX].n, aLeft[iSX].z, aLeft[iEX-1].n, aLeft[iEX-1].z); } a1 = diffBlockAlignment(aLeft,iSX,aRight,iSY,pCfg,&n1); a2 = diffBlockAlignment(aLeft+iEX, nLeft-iEX, aRight+iEY, nRight-iEY, pCfg, &n2); a1 = fossil_realloc(a1, n1+nLCS+n2); memcpy(a1+n1+nLCS,a2,n2); memset(a1+n1,3,nLCS); fossil_free(a2); *pNResult = n1+n2+nLCS; return a1; } /* ** This is a helper route for diffBlockAlignment(). In this case, ** a very large block is encountered that might be too expensive to ** use the O(N*N) Wagner edit distance algorithm. So instead, this ** block implements a less-precise but faster O(N*logN) divide-and-conquer ** approach. */ static unsigned char *diffBlockAlignmentDivideAndConquer( DLine *aLeft, int nLeft, /* Text on the left */ DLine *aRight, int nRight, /* Text on the right */ DiffConfig *pCfg, /* Configuration options */ int *pNResult /* OUTPUT: Bytes of result */ ){ DLine *aSmall; /* The smaller of aLeft and aRight */ DLine *aBig; /* The larger of aLeft and aRight */ int nSmall, nBig; /* Size of aSmall and aBig. nSmall<=nBig */ int iDivSmall, iDivBig; /* Divider point for aSmall and aBig */ int iDivLeft, iDivRight; /* Divider point for aLeft and aRight */ unsigned char *a1, *a2; /* Results of the alignments on two halves */ int n1, n2; /* Number of entries in a1 and a2 */ int score, bestScore; /* Score and best score seen so far */ int i; /* Loop counter */ if( nLeft>nRight ){ aSmall = aRight; nSmall = nRight; aBig = aLeft; nBig = nLeft; }else{ aSmall = aLeft; nSmall = nLeft; aBig = aRight; nBig = nRight; } iDivBig = nBig/2; iDivSmall = nSmall/2; if( pCfg->diffFlags & DIFF_DEBUG ){ fossil_print(" Divide at [%.*s]\n", aBig[iDivBig].n, aBig[iDivBig].z); } bestScore = 10000; for(i=0; i<nSmall; i++){ score = match_dline(aBig+iDivBig, aSmall+i) + abs(i-nSmall/2)*2; if( score<bestScore ){ bestScore = score; iDivSmall = i; } } if( aSmall==aRight ){ iDivRight = iDivSmall; iDivLeft = iDivBig; }else{ iDivRight = iDivBig; iDivLeft = iDivSmall; } a1 = diffBlockAlignment(aLeft,iDivLeft,aRight,iDivRight,pCfg,&n1); a2 = diffBlockAlignment(aLeft+iDivLeft, nLeft-iDivLeft, aRight+iDivRight, nRight-iDivRight, pCfg, &n2); a1 = fossil_realloc(a1, n1+n2 ); memcpy(a1+n1,a2,n2); fossil_free(a2); *pNResult = n1+n2; return a1; } /* ** The threshold at which diffBlockAlignment transitions from the ** O(N*N) Wagner minimum-edit-distance algorithm to a less process ** O(NlogN) divide-and-conquer approach. */ #define DIFF_ALIGN_MX 1225 |
︙ | ︙ | |||
1848 1849 1850 1851 1852 1853 1854 | ** Algorithm: Wagner's minimum edit-distance algorithm, modified by ** adding a cost to each match based on how well the two rows match ** each other. Insertion and deletion costs are 50. Match costs ** are between 0 and 100 where 0 is a perfect match 100 is a complete ** mismatch. */ static unsigned char *diffBlockAlignment( | | | | | | 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 | ** Algorithm: Wagner's minimum edit-distance algorithm, modified by ** adding a cost to each match based on how well the two rows match ** each other. Insertion and deletion costs are 50. Match costs ** are between 0 and 100 where 0 is a perfect match 100 is a complete ** mismatch. */ static unsigned char *diffBlockAlignment( DLine *aLeft, int nLeft, /* Text on the left */ DLine *aRight, int nRight, /* Text on the right */ DiffConfig *pCfg, /* Configuration options */ int *pNResult /* OUTPUT: Bytes of result */ ){ int i, j, k; /* Loop counters */ int *a; /* One row of the Wagner matrix */ int *pToFree; /* Space that needs to be freed */ unsigned char *aM; /* Wagner result matrix */ int nMatch, iMatch; /* Number of matching lines and match score */ int aBuf[100]; /* Stack space for a[] if nRight not to big */ |
︙ | ︙ | |||
1873 1874 1875 1876 1877 1878 1879 | if( nRight==0 ){ aM = fossil_malloc( nLeft + 2 ); memset(aM, 1, nLeft); *pNResult = nLeft; return aM; } | > | > > > | > | < < < < < | | < < < < < | < < < < < | < < < < < < < < < < < | < < < < < < | | < < < < < < | 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 | if( nRight==0 ){ aM = fossil_malloc( nLeft + 2 ); memset(aM, 1, nLeft); *pNResult = nLeft; return aM; } if( pCfg->diffFlags & DIFF_DEBUG ){ fossil_print("BlockAlignment:\n [%.*s] + %d\n [%.*s] + %d\n", aLeft[0].n, aLeft[0].z, nLeft, aRight[0].n, aRight[0].z, nRight); } /* For large alignments, try to use alternative algorithms that are ** faster than the O(N*N) Wagner edit distance. */ if( nLeft*nRight>DIFF_ALIGN_MX && (pCfg->diffFlags & DIFF_SLOW_SBS)==0 ){ if( (pCfg->diffFlags & DIFF_IGNORE_ALLWS)==0 ){ unsigned char *aRes; aRes = diffBlockAlignmentIgnoreSpace( aLeft, nLeft,aRight, nRight,pCfg,pNResult); if( aRes ) return aRes; } return diffBlockAlignmentDivideAndConquer( aLeft, nLeft,aRight, nRight,pCfg,pNResult); } /* If we reach this point, we will be doing an O(N*N) Wagner minimum ** edit distance to compute the alignment. */ if( nRight < count(aBuf)-1 ){ pToFree = 0; |
︙ | ︙ | |||
2024 2025 2026 2027 2028 2029 2030 | ** Format a diff using a DiffBuilder object */ static void formatDiff( DContext *p, /* The computed diff */ DiffConfig *pCfg, /* Configuration options */ DiffBuilder *pBuilder /* The formatter object */ ){ | | | | 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 | ** Format a diff using a DiffBuilder object */ static void formatDiff( DContext *p, /* The computed diff */ DiffConfig *pCfg, /* Configuration options */ DiffBuilder *pBuilder /* The formatter object */ ){ DLine *A; /* Left side of the diff */ DLine *B; /* Right side of the diff */ unsigned int a = 0; /* Index of next line in A[] */ unsigned int b = 0; /* Index of next line in B[] */ const int *R; /* Array of COPY/DELETE/INSERT triples */ unsigned int r; /* Index into R[] */ unsigned int nr; /* Number of COPY/DELETE/INSERT triples to process */ unsigned int mxr; /* Maximum value for r */ unsigned int na, nb; /* Number of lines shown from A and B */ |
︙ | ︙ | |||
2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 | expandEdit(p, p->nEdit*2 + 15); if( p->aEdit==0 ) return; } p->aEdit[p->nEdit++] = nCopy; p->aEdit[p->nEdit++] = nDel; p->aEdit[p->nEdit++] = nIns; } /* ** Do a single step in the difference. Compute a sequence of ** copy/delete/insert steps that will convert lines iS1 through iE1-1 of ** the input into lines iS2 through iE2-1 of the output and write ** that sequence into the difference context. ** | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 | expandEdit(p, p->nEdit*2 + 15); if( p->aEdit==0 ) return; } p->aEdit[p->nEdit++] = nCopy; p->aEdit[p->nEdit++] = nDel; p->aEdit[p->nEdit++] = nIns; } /* ** A common subsequene between p->aFrom and p->aTo has been found. ** This routine tries to judge if the subsequence really is a valid ** match or rather is just an artifact of an indentation change. ** ** Return non-zero if the subsequence is valid. Return zero if the ** subsequence seems likely to be an editing artifact and should be ** ignored. ** ** This routine is a heuristic optimization intended to give more ** intuitive diff results following an indentation change it code that ** is formatted similarly to C/C++, Javascript, Go, TCL, and similar ** languages that use {...} for nesting. A correct diff is computed ** even if this routine always returns true (non-zero). But sometimes ** a more intuitive diff can result if this routine returns false. ** ** The subsequences consists of the rows iSX through iEX-1 (inclusive) ** in p->aFrom[]. The total sequences is iS1 through iE1-1 (inclusive) ** of p->aFrom[]. ** ** Example where this heuristic is useful, see the diff at ** https://www.sqlite.org/src/fdiff?v1=0e79dd15cbdb4f48&v2=33955a6fd874dd97 ** ** See also discussion at https://fossil-scm.org/forum/forumpost/9ba3284295 ** ** ALGORITHM (subject to change and refinement): ** ** 1. If the subsequence is larger than 1/7th of the original span, ** then consider it valid. --> return 1 ** ** 2. If the subsequence contains any charaters other than '}', '{", ** or whitespace, then consider it valid. --> return 1 ** ** 3. Otherwise, it is potentially an artifact of an indentation ** change. --> return 0 */ static int likelyNotIndentChngArtifact( DContext *p, /* The complete diff context */ int iS1, /* Start of the main segment */ int iSX, /* Start of the subsequence */ int iEX, /* First row past the end of the subsequence */ int iE1 /* First row past the end of the main segment */ ){ int i, j; if( (iEX-iSX)*7 >= (iE1-iS1) ) return 1; for(i=iSX; i<iEX; i++){ const char *z = p->aFrom[i].z; for(j=p->aFrom[i].n-1; j>=0; j--){ char c = z[j]; if( c!='}' && c!='{' && !diff_isspace(c) ) return 1; } } return 0; } /* ** Do a single step in the difference. Compute a sequence of ** copy/delete/insert steps that will convert lines iS1 through iE1-1 of ** the input into lines iS2 through iE2-1 of the output and write ** that sequence into the difference context. ** |
︙ | ︙ | |||
2440 2441 2442 2443 2444 2445 2446 | appendTriple(p, 0, iE1-iS1, 0); return; } /* Find the longest matching segment between the two sequences */ longestCommonSequence(p, iS1, iE1, iS2, iE2, &iSX, &iEX, &iSY, &iEY); | | > > | 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 | appendTriple(p, 0, iE1-iS1, 0); return; } /* Find the longest matching segment between the two sequences */ longestCommonSequence(p, iS1, iE1, iS2, iE2, &iSX, &iEX, &iSY, &iEY); if( iEX>iSX+5 || (iEX>iSX && likelyNotIndentChngArtifact(p,iS1,iSX,iEX,iE1) ) ){ /* A common segment has been found. ** Recursively diff either side of the matching segment */ diff_step(p, iS1, iSX, iS2, iSY); if( iEX>iSX ){ appendTriple(p, iEX - iSX, 0, 0); } diff_step(p, iEX, iE1, iEY, iE2); |
︙ | ︙ |