Fossil: Check-in [1cb182ac]

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview

Comment:	Diff algorithm is slightly faster and does a better job of dealing with indentation changes in code. See forum thread 7631656a2823338a.
Downloads:	Tarball \| ZIP archive \| SQL archive
Timelines:	family \| ancestors \| descendants \| both \| trunk
Files:	files \| file ages \| folders
SHA3-256:	1cb182ac18de0bb78c060c9641ba87b08b56a532ee71f04b2718747123942260
User & Date:	drh 2022-01-23 20:11:50

Context

2022-01-24
06:54		Replaced the "manual" TLS EOF tracking with BIO_eof(), analog to how is done in althttpd. ... (check-in: 06e300e5 user: stephan tags: trunk)
2022-01-23
20:11		Diff algorithm is slightly faster and does a better job of dealing with indentation changes in code. See forum thread 7631656a2823338a. ... (check-in: 1cb182ac user: drh tags: trunk)
19:57		Add a heuristic to the diff generator that helps it do a better job of identifying differences in C code that result from a change in indentation level. ... (Closed-Leaf check-in: 8cd73dda user: drh tags: diff-improvement)
12:52		Fix bullets in wsl_caveats.wiki ... (check-in: ea6b2d3e user: larrybr tags: trunk)

Changes

Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/diff.c.

︙			︙
119 120 121 122 123 124 125 ~~126~~ 127 128 129 130 131 132 133 134	of the line. If any line is longer than LENGTH_MASK characters, the file is considered binary. / typedef struct DLine DLine; struct DLine { const char z; /* The text of the line / u64 h; / Hash of the line / ~~unsigned short indent; / Indent of t~~he line. Only !=0 with -w/-Z opti~~on /~~ unsigned short n; / number of bytes / unsigned int iNext; / 1+(Index of next line with same the same hash) / / an array of DLine elements serves two purposes. The fields above are one per line of input text. But each entry is also a bucket in a hash table, as follows: / unsigned int iHash; / 1+(first entry in the hash chain) */ };	\| >	119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135	of the line. If any line is longer than LENGTH_MASK characters, the file is considered binary. / typedef struct DLine DLine; struct DLine { const char z; /* The text of the line / u64 h; / Hash of the line / unsigned short indent; / Index of first non-space / unsigned short n; / number of bytes / unsigned short nw; / number of bytes without leading/trailing space / unsigned int iNext; / 1+(Index of next line with same the same hash) / / an array of DLine elements serves two purposes. The fields above are one per line of input text. But each entry is also a bucket in a hash table, as follows: / unsigned int iHash; / 1+(first entry in the hash chain) */ };
︙			︙
161 162 163 164 165 166 167 ~~168~~ 169 170 171 172 173 174 175 176	int aEdit; / Array of copy/delete/insert triples / int nEdit; / Number of integers (3x num of triples) in aEdit[] / int nEditAlloc; / Space allocated for aEdit[] / DLine aFrom; /* File on left side of the diff / int nFrom; / Number of lines in aFrom[] / DLine aTo; /* File on right side of the diff / int nTo; / Number of lines in aTo[] / ~~int (xDiffer)(const DLine,const DLine); /* comparison function /~~ }; / Count the number of lines in the input string. Include the last line in the count even if it lacks the \n terminator. If an empty string is specified, the number of lines is zero. For the purposes of this function, a string is considered empty if it contains no characters ** -OR- it contains only NUL characters.	\| > > > > > > > > > > > > > > > > > > > > > >	162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199	int aEdit; / Array of copy/delete/insert triples / int nEdit; / Number of integers (3x num of triples) in aEdit[] / int nEditAlloc; / Space allocated for aEdit[] / DLine aFrom; /* File on left side of the diff / int nFrom; / Number of lines in aFrom[] / DLine aTo; /* File on right side of the diff / int nTo; / Number of lines in aTo[] / int (xDiffer)(const DLine ,const DLine ); /* comparison function / }; / Fast isspace for use by diff / static const char diffIsSpace[] = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; #define diff_isspace(X) (diffIsSpace[(unsigned char)(X)]) / Count the number of lines in the input string. Include the last line in the count even if it lacks the \n terminator. If an empty string is specified, the number of lines is zero. For the purposes of this function, a string is considered empty if it contains no characters ** -OR- it contains only NUL characters.
︙			︙
243 244 245 246 247 248 249 ~~250~~ 251 ~~252~~ 253 254 255 ~~256~~ 257 258 ~~259~~ 260 261 262 263 264 265 266 267 268 ~~269~~ 270 271 272 273 274 275 276 ~~277~~ 278 279 280 281 282 283 284	} a[i].z = z; k = nn; if( diffFlags & DIFF_STRIP_EOLCR ){ if( k>0 && z[k-1]=='\r' ){ k--; } } a[i].n = k; ~~s = 0;~~ if( diffFlags & DIFF_IGNORE_EOLWS ){ ~~while( k>0 && ~~fossil~~_isspace(z[k-1]) ){ k--; }~~ } if( (diffFlags & DIFF_IGNORE_ALLWS)==DIFF_IGNORE_ALLWS ){ int numws = 0; ~~~~while~~( s<k && ~~fossil_isspace(~~z[s]~~) ){~~ s++; }~~ for(h=0, x=s; x<k; x++){ char c = z[x]; ~~if( ~~fossil~~_isspace(c) ){~~ ++numws; }else{ h = (h^c)9000000000000000041LL; } } k -= numws; }else{ int k2 = k & ~0x7; u64 m; ~~for(h=~~0, x=s~~; x<k2; x += 8){~~ memcpy(&m, z+x, 8); h = (h^m)9000000000000000041LL; } m = 0; memcpy(&m, z+x, k-k2); h ^= m; } ~~a[i].indent = s;~~ a[i].h = h = ((h%281474976710597LL)<<LENGTH_MASK_SZ) \| (k-s); h2 = h % nLine; a[i].iNext = a[h2].iHash; a[h2].iHash = i+1; z += nn+1; n -= nn+1; i++; }while( zNL[0]!='\0' && zNL[1]!='\0' );	< \| \| > > \| \| <	266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307	} a[i].z = z; k = nn; if( diffFlags & DIFF_STRIP_EOLCR ){ if( k>0 && z[k-1]=='\r' ){ k--; } } a[i].n = k; if( diffFlags & DIFF_IGNORE_EOLWS ){ while( k>0 && diff_isspace(z[k-1]) ){ k--; } } if( (diffFlags & DIFF_IGNORE_ALLWS)==DIFF_IGNORE_ALLWS ){ int numws = 0; for(s=0; s<k && z[s]<=' '; s++){} a[i].indent = s; a[i].nw = k - s; for(h=0, x=s; x<k; x++){ char c = z[x]; if( diff_isspace(c) ){ ++numws; }else{ h = (h^c)9000000000000000041LL; } } k -= numws; }else{ int k2 = k & ~0x7; u64 m; for(h=x=s=0; x<k2; x += 8){ memcpy(&m, z+x, 8); h = (h^m)9000000000000000041LL; } m = 0; memcpy(&m, z+x, k-k2); h ^= m; } a[i].h = h = ((h%281474976710597LL)<<LENGTH_MASK_SZ) \| (k-s); h2 = h % nLine; a[i].iNext = a[h2].iHash; a[h2].iHash = i+1; z += nn+1; n -= nn+1; i++; }while( zNL[0]!='\0' && zNL[1]!='\0' );
︙			︙
298 299 300 301 302 303 304 ~~305~~ 306 ~~307~~ 308 309 310 ~~311 312~~ 313 314 315 316 317 318 319	} /* Return zero if two DLine elements are identical, ignoring all whitespace. The indent field of pA/pB already points ** to the first non-space character in the string. / static int compare_dline_ignore_allws(const DLine pA, const DLine pB){ ~~int a = pA->indent, b = pB->indent;~~ if( pA->h==pB->h ){ while( a<pA->n \|\| b<pB->n ){ if( a<pA->n && b<pB->n && pA->z[a++] != pB->z[b++] ) return 1; ~~while( a<pA->n && ~~fossil~~_isspace(pA->z[a])) ++a; while( b<pB->n && ~~fossil~~_isspace(pB->z[b])) ++b;~~ } return pA->n-a != pB->n-b; } return 1; } /	< < > > > > \| \|	321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344	} /* Return zero if two DLine elements are identical, ignoring all whitespace. The indent field of pA/pB already points ** to the first non-space character in the string. / static int compare_dline_ignore_allws(const DLine pA, const DLine pB){ if( pA->h==pB->h ){ int a, b; if( memcmp(pA->z, pB->z, pA->h&LENGTH_MASK)==0 ) return 0; a = pA->indent; b = pB->indent; while( a<pA->n \|\| b<pB->n ){ if( a<pA->n && b<pB->n && pA->z[a++] != pB->z[b++] ) return 1; while( a<pA->n && diff_isspace(pA->z[a])) ++a; while( b<pB->n && diff_isspace(pB->z[b])) ++b; } return pA->n-a != pB->n-b; } return 1; } /
︙			︙
336 337 338 339 340 341 342 ~~343~~ 344 345 346 347 348 349 350	/* ** Append a single line of context-diff output to pOut. / static void appendDiffLine( Blob pOut, /* Where to write the line of output / char cPrefix, / One of " ", "+", or "-" / ~~DLine pLine /* The line to be output /~~ ){ blob_append_char(pOut, cPrefix); blob_append(pOut, pLine->z, pLine->n); blob_append_char(pOut, '\n'); } /	\|	361 362 363 364 365 366 367 368 369 370 371 372 373 374 375	/* ** Append a single line of context-diff output to pOut. / static void appendDiffLine( Blob pOut, /* Where to write the line of output / char cPrefix, / One of " ", "+", or "-" / const DLine pLine /* The line to be output / ){ blob_append_char(pOut, cPrefix); blob_append(pOut, pLine->z, pLine->n); blob_append_char(pOut, '\n'); } /
︙			︙
369 370 371 372 373 374 375 ~~376 377 378 379~~ 380 381 382 383 384 385 386	** Output a patch-style text diff. / static void contextDiff( DContext p, /* The difference / Blob pOut, /* Output a context diff to here / DiffConfig pCfg /* Configuration options / ){ ~~DLine A; /* Left side of the diff / DLine B; /* Right side of the diff / int a = 0; / Index of next line in A[] / int b = 0; / Index of next line in B[] /~~ int R; /* Array of COPY/DELETE/INSERT triples / int r; / Index into R[] / int nr; / Number of COPY/DELETE/INSERT triples to process / int mxr; / Maximum value for r / int na, nb; / Number of lines shown from A and B / int i, j; / Loop counters / int m; / Number of lines to output */	\| \| \| \|	394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411	** Output a patch-style text diff. / static void contextDiff( DContext p, /* The difference / Blob pOut, /* Output a context diff to here / DiffConfig pCfg /* Configuration options / ){ const DLine A; /* Left side of the diff / const DLine B; /* Right side of the diff / int a = 0; / Index of next line in A[] / int b = 0; / Index of next line in B[] / int R; /* Array of COPY/DELETE/INSERT triples / int r; / Index into R[] / int nr; / Number of COPY/DELETE/INSERT triples to process / int mxr; / Maximum value for r / int na, nb; / Number of lines shown from A and B / int i, j; / Loop counters / int m; / Number of lines to output */
︙			︙
617 618 619 620 621 622 623 ~~624~~ 625 626 627 628 629 630 631	} /* ** Return true if the string starts with n spaces / static int allSpaces(const char z, int n){ int i; ~~for(i=0; i<n && ~~fossil~~_isspace(z[i]); i++){}~~ return i==n; } /* Try to improve the human-readability of the LineChange p. ** (1) If the first change span shows a change of indentation, try to	\|	642 643 644 645 646 647 648 649 650 651 652 653 654 655 656	} /* ** Return true if the string starts with n spaces / static int allSpaces(const char z, int n){ int i; for(i=0; i<n && diff_isspace(z[i]); i++){} return i==n; } /* Try to improve the human-readability of the LineChange p. ** (1) If the first change span shows a change of indentation, try to
︙			︙
743 744 745 746 747 748 749 ~~750~~ 751 752 753 754 755 ~~756~~ 757 758 759 760 761 762 763	int iBestVal = -1; int i; int nLong = nLeft<nRight ? nRight : nLeft; int nGap = nLong - nShort; for(i=nShort-nSuffix; i<=nPrefix; i++){ int iVal = 0; char c = zLeft[i]; ~~if( ~~fossil~~_isspace(c) ){~~ iVal += 5; }else if( !fossil_isalnum(c) ){ iVal += 2; } c = zLeft[i+nGap-1]; ~~if( ~~fossil~~_isspace(c) ){~~ iVal += 5; }else if( !fossil_isalnum(c) ){ iVal += 2; } if( iVal>iBestVal ){ iBestVal = iVal; iBest = i;	\| \|	768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788	int iBestVal = -1; int i; int nLong = nLeft<nRight ? nRight : nLeft; int nGap = nLong - nShort; for(i=nShort-nSuffix; i<=nPrefix; i++){ int iVal = 0; char c = zLeft[i]; if( diff_isspace(c) ){ iVal += 5; }else if( !fossil_isalnum(c) ){ iVal += 2; } c = zLeft[i+nGap-1]; if( diff_isspace(c) ){ iVal += 5; }else if( !fossil_isalnum(c) ){ iVal += 2; } if( iVal>iBestVal ){ iBestVal = iVal; iBest = i;
︙			︙
887 888 889 890 891 892 893 ~~894~~ 895 896 897 898 899 900 901	/ typedef struct DiffBuilder DiffBuilder; struct DiffBuilder { void (xSkip)(DiffBuilder, unsigned int, int); void (xCommon)(DiffBuilder,const DLine); void (xInsert)(DiffBuilder,const DLine); void (xDelete)(DiffBuilder,const DLine); ~~void (xReplace)(DiffBuilder,const DLine, const DLine);~~ void (xEdit)(DiffBuilder,const DLine,const DLine); void (xEnd)(DiffBuilder); unsigned int lnLeft; /* Lines seen on the left (delete) side / unsigned int lnRight; / Lines seen on the right (insert) side / unsigned int nPending; / Number of pending lines / int eState; / State of the output / int width; / Display width */	\|	912 913 914 915 916 917 918 919 920 921 922 923 924 925 926	/ typedef struct DiffBuilder DiffBuilder; struct DiffBuilder { void (xSkip)(DiffBuilder, unsigned int, int); void (xCommon)(DiffBuilder,const DLine); void (xInsert)(DiffBuilder,const DLine); void (xDelete)(DiffBuilder,const DLine); void (xReplace)(DiffBuilder,const DLine,const DLine); void (xEdit)(DiffBuilder,const DLine,const DLine); void (xEnd)(DiffBuilder); unsigned int lnLeft; /* Lines seen on the left (delete) side / unsigned int lnRight; / Lines seen on the right (insert) side / unsigned int nPending; / Number of pending lines / int eState; / State of the output / int width; / Display width */
︙			︙
1731 1732 1733 1734 1735 1736 1737 ~~1738~~ 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 ~~1755~~ ~~1756 1757 1758 1759 1760~~ 1761 1762 1763 1764 1765 1766 1767	(1) Remove leading and trailing whitespace. (2) Truncate both strings to at most 250 characters (3) If the two strings have a common prefix, measure that prefix (4) Find the length of the longest common subsequence that is at least 150% longer than the common prefix. (5) Longer common subsequences yield lower scores. / ~~static int match_dline(~~const~~ DLine pA, ~~const~~ DLine pB){~~ const char zA; /* Left string / const char zB; /* right string / int nA; / Bytes in zA[] / int nB; / Bytes in zB[] / int nMin; int nPrefix; int avg; / Average length of A and B / int i, j, k; / Loop counters / int best = 0; / Longest match found so far / int score; / Final score. 0..100 / unsigned char c; / Character being examined / unsigned char aFirst[256]; / aFirst[X] = index in zB[] of first char X / unsigned char aNext[252]; / aNext[i] = index in zB[] of next zB[i] char */ zA = pA->z; zB = pB->z; ~~nA = ~~pA->n~~;~~ nB = pB->n; ~~while( nA>0 && (unsigned char)zA[0]<=' ' ){ nA--; zA++; }~~ ~~while( nA>0 && (unsigned char)zA[nA-1]<=' ' ){ nA--; }~~ ~~while( nB>0 && (unsigned char)zB[0]<=' ' ){ nB--; zB++; }~~ ~~while( nB>0 && (unsigned char)zB[nB-1]<=' ' ){ nB--; }~~ if( nA>250 ) nA = 250; if( nB>250 ) nB = 250; avg = (nA+nB)/2; if( avg==0 ) return 0; nMin = nA; if( nB<nMin ) nMin = nB; if( nMin==0 ) return 68;	\| > > > > > > > > > > > \| > > > > \| \| < < <	1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804	(1) Remove leading and trailing whitespace. (2) Truncate both strings to at most 250 characters (3) If the two strings have a common prefix, measure that prefix (4) Find the length of the longest common subsequence that is at least 150% longer than the common prefix. (5) Longer common subsequences yield lower scores. / static int match_dline(DLine pA, DLine pB){ const char zA; /* Left string / const char zB; /* right string / int nA; / Bytes in zA[] / int nB; / Bytes in zB[] / int nMin; int nPrefix; int avg; / Average length of A and B / int i, j, k; / Loop counters / int best = 0; / Longest match found so far / int score; / Final score. 0..100 / unsigned char c; / Character being examined / unsigned char aFirst[256]; / aFirst[X] = index in zB[] of first char X / unsigned char aNext[252]; / aNext[i] = index in zB[] of next zB[i] char */ zA = pA->z; if( pA->nw==0 && pA->n ){ for(i=0; i<pA->n && diff_isspace(zA[i]); i++){} pA->indent = i; for(j=pA->n-1; j>i && diff_isspace(zA[j]); j--){} pA->nw = j - i + 1; } zA += pA->indent; nA = pA->nw; zB = pB->z; if( pB->nw==0 && pB->n ){ for(i=0; i<pB->n && diff_isspace(zB[i]); i++){} pB->indent = i; for(j=pB->n-1; j>i && diff_isspace(zB[j]); j--){} pB->nw = j - i + 1; } zB += pB->indent; nB = pB->nw; if( nA>250 ) nA = 250; if( nB>250 ) nB = 250; avg = (nA+nB)/2; if( avg==0 ) return 0; nMin = nA; if( nB<nMin ) nMin = nB; if( nMin==0 ) return 68;
︙			︙
1783 1784 1785 1786 1787 1788 1789 ~~1790~~ 1791 1792 1793 1794 1795 1796 1797	c = (unsigned char)zA[i]; for(j=aFirst[c]; j<nB-best && memcmp(&zA[i],&zB[j],best)==0; j = aNext[j]){ int limit = minInt(nA-i, nB-j); for(k=best; k<=limit && zA[k+i]==zB[k+j]; k++){} if( k>best ) best = k; } } ~~score = (best>=avg) ? 0 : (avg - best)~~100~~/avg;~~ #if 0 fprintf(stderr, "A: [%.s]\nB: [%.s]\nbest=%d avg=%d score=%d\n", nA, zA+1, nB, zB+1, best, avg, score); #endif / Return the result */	\|	1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834	c = (unsigned char)zA[i]; for(j=aFirst[c]; j<nB-best && memcmp(&zA[i],&zB[j],best)==0; j = aNext[j]){ int limit = minInt(nA-i, nB-j); for(k=best; k<=limit && zA[k+i]==zB[k+j]; k++){} if( k>best ) best = k; } } score = 5 + ((best>=avg) ? 0 : (avg - best)95/avg); #if 0 fprintf(stderr, "A: [%.s]\nB: [%.s]\nbest=%d avg=%d score=%d\n", nA, zA+1, nB, zB+1, best, avg, score); #endif / Return the result */
︙			︙
1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828	a.z = g.argv[2]; a.n = (int)strlen(a.z); b.z = g.argv[3]; b.n = (int)strlen(b.z); x = match_dline(&a, &b); fossil_print("%d\n", x); } /* The threshold at which diffBlockAlignment transitions from the O(NN) Wagner minimum-edit-distance algorithm to a less process * O(NlogN) divide-and-conquer approach. */ #define DIFF_ALIGN_MX 1225	> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > >	1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038	a.z = g.argv[2]; a.n = (int)strlen(a.z); b.z = g.argv[3]; b.n = (int)strlen(b.z); x = match_dline(&a, &b); fossil_print("%d\n", x); } /* Forward declarations for recursion / static unsigned char diffBlockAlignment( DLine aLeft, int nLeft, / Text on the left / DLine aRight, int nRight, /* Text on the right / DiffConfig pCfg, /* Configuration options / int pNResult /* OUTPUT: Bytes of result / ); static void longestCommonSequence( DContext p, /* Two files being compared / int iS1, int iE1, / Range of lines in p->aFrom[] / int iS2, int iE2, / Range of lines in p->aTo[] / int piSX, int piEX, / Write p->aFrom[] common segment here / int piSY, int piEY / Write p->aTo[] common segment here / ); / Make a copy of a list of nLine DLine objects from one array to another. Hash the new array to ignore whitespace. / static void diffDLineXfer( DLine aTo, const DLine aFrom, int nLine ){ int i, j, k; u64 h, h2; for(i=0; i<nLine; i++) aTo[i].iHash = 0; for(i=0; i<nLine; i++){ const char z = aFrom[i].z; int n = aFrom[i].n; for(j=0; j<n && diff_isspace(z[j]); j++){} aTo[i].z = &z[j]; for(k=aFrom[i].n; k>j && diff_isspace(z[k-1]); k--){} aTo[i].n = n = k-j; aTo[i].indent = 0; aTo[i].nw = 0; for(h=0; j<k; j++){ char c = z[j]; if( !diff_isspace(c) ){ h = (h^c)9000000000000000041LL; } } aTo[i].h = h = ((h%281474976710597LL)<<LENGTH_MASK_SZ) \| n; h2 = h % nLine; aTo[i].iNext = aTo[h2].iHash; aTo[h2].iHash = i+1; } } / For a difficult diff-block alignment that was originally for the default consider-all-whitespace algorithm, try to find the longest common subsequence between the two blocks that involves only whitespace changes. / static unsigned char diffBlockAlignmentIgnoreSpace( DLine aLeft, int nLeft, / Text on the left / DLine aRight, int nRight, /* Text on the right / DiffConfig pCfg, /* Configuration options / int pNResult /* OUTPUT: Bytes of result / ){ DContext dc; int iSX, iEX; / Start and end of LCS on the left / int iSY, iEY; / Start and end of the LCS on the right / unsigned char a1, a2; int n1, n2, nLCS; dc.aEdit = 0; dc.nEdit = 0; dc.nEditAlloc = 0; dc.nFrom = nLeft; dc.nTo = nRight; dc.xDiffer = compare_dline_ignore_allws; dc.aFrom = fossil_malloc( sizeof(DLine)(nLeft+nRight) ); dc.aTo = &dc.aFrom[dc.nFrom]; diffDLineXfer(dc.aFrom, aLeft, nLeft); diffDLineXfer(dc.aTo, aRight, nRight); longestCommonSequence(&dc,0,nLeft,0,nRight,&iSX,&iEX,&iSY,&iEY); fossil_free(dc.aFrom); nLCS = iEX - iSX; if( nLCS<5 ) return 0; /* No good LCS was found / if( pCfg->diffFlags & DIFF_DEBUG ){ fossil_print(" LCS size=%d\n" " [%.s]\n" " [%.s]\n", nLCS, aLeft[iSX].n, aLeft[iSX].z, aLeft[iEX-1].n, aLeft[iEX-1].z); } a1 = diffBlockAlignment(aLeft,iSX,aRight,iSY,pCfg,&n1); a2 = diffBlockAlignment(aLeft+iEX, nLeft-iEX, aRight+iEY, nRight-iEY, pCfg, &n2); a1 = fossil_realloc(a1, n1+nLCS+n2); memcpy(a1+n1+nLCS,a2,n2); memset(a1+n1,3,nLCS); fossil_free(a2); pNResult = n1+n2+nLCS; return a1; } /* This is a helper route for diffBlockAlignment(). In this case, a very large block is encountered that might be too expensive to ** use the O(NN) Wagner edit distance algorithm. So instead, this * block implements a less-precise but faster O(NlogN) divide-and-conquer * approach. / static unsigned char diffBlockAlignmentDivideAndConquer( DLine aLeft, int nLeft, / Text on the left / DLine aRight, int nRight, /* Text on the right / DiffConfig pCfg, /* Configuration options / int pNResult /* OUTPUT: Bytes of result / ){ DLine aSmall; /* The smaller of aLeft and aRight / DLine aBig; /* The larger of aLeft and aRight / int nSmall, nBig; / Size of aSmall and aBig. nSmall<=nBig / int iDivSmall, iDivBig; / Divider point for aSmall and aBig / int iDivLeft, iDivRight; / Divider point for aLeft and aRight / unsigned char a1, a2; / Results of the alignments on two halves / int n1, n2; / Number of entries in a1 and a2 / int score, bestScore; / Score and best score seen so far / int i; / Loop counter / if( nLeft>nRight ){ aSmall = aRight; nSmall = nRight; aBig = aLeft; nBig = nLeft; }else{ aSmall = aLeft; nSmall = nLeft; aBig = aRight; nBig = nRight; } iDivBig = nBig/2; iDivSmall = nSmall/2; if( pCfg->diffFlags & DIFF_DEBUG ){ fossil_print(" Divide at [%.s]\n", aBig[iDivBig].n, aBig[iDivBig].z); } bestScore = 10000; for(i=0; i<nSmall; i++){ score = match_dline(aBig+iDivBig, aSmall+i) + abs(i-nSmall/2)2; if( score<bestScore ){ bestScore = score; iDivSmall = i; } } if( aSmall==aRight ){ iDivRight = iDivSmall; iDivLeft = iDivBig; }else{ iDivRight = iDivBig; iDivLeft = iDivSmall; } a1 = diffBlockAlignment(aLeft,iDivLeft,aRight,iDivRight,pCfg,&n1); a2 = diffBlockAlignment(aLeft+iDivLeft, nLeft-iDivLeft, aRight+iDivRight, nRight-iDivRight, pCfg, &n2); a1 = fossil_realloc(a1, n1+n2 ); memcpy(a1+n1,a2,n2); fossil_free(a2); pNResult = n1+n2; return a1; } /* The threshold at which diffBlockAlignment transitions from the O(NN) Wagner minimum-edit-distance algorithm to a less process * O(NlogN) divide-and-conquer approach. */ #define DIFF_ALIGN_MX 1225
︙			︙
1848 1849 1850 1851 1852 1853 1854 ~~1855 1856 1857 1858~~ 1859 1860 1861 1862 1863 1864 1865	Algorithm: Wagner's minimum edit-distance algorithm, modified by adding a cost to each match based on how well the two rows match each other. Insertion and deletion costs are 50. Match costs are between 0 and 100 where 0 is a perfect match 100 is a complete ** mismatch. / static unsigned char diffBlockAlignment( ~~const~~ DLine aLeft, int nLeft, / Text on the left / ~~const~~ DLine aRight, int nRight, /* Text on the right / DiffConfig pCfg, /* Configuration options / int pNResult /* OUTPUT: Bytes of result / ){ int i, j, k; / Loop counters / int a; /* One row of the Wagner matrix / int pToFree; /* Space that needs to be freed / unsigned char aM; /* Wagner result matrix / int nMatch, iMatch; / Number of matching lines and match score / int aBuf[100]; / Stack space for a[] if nRight not to big */	\| \| \| \|	2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075	Algorithm: Wagner's minimum edit-distance algorithm, modified by adding a cost to each match based on how well the two rows match each other. Insertion and deletion costs are 50. Match costs are between 0 and 100 where 0 is a perfect match 100 is a complete ** mismatch. / static unsigned char diffBlockAlignment( DLine aLeft, int nLeft, / Text on the left / DLine aRight, int nRight, /* Text on the right / DiffConfig pCfg, /* Configuration options / int pNResult /* OUTPUT: Bytes of result / ){ int i, j, k; / Loop counters / int a; /* One row of the Wagner matrix / int pToFree; /* Space that needs to be freed / unsigned char aM; /* Wagner result matrix / int nMatch, iMatch; / Number of matching lines and match score / int aBuf[100]; / Stack space for a[] if nRight not to big */
︙			︙
1873 1874 1875 1876 1877 1878 1879 ~~1880~~ ~~1881~~ ~~1882 1883~~ 1884 1885 ~~1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920~~ 1921 ~~1922 1923 1924 1925 1926 1927 1928 1929 1930~~ 1931 1932 1933 1934 1935 1936 1937	if( nRight==0 ){ aM = fossil_malloc( nLeft + 2 ); memset(aM, 1, nLeft); pNResult = nLeft; return aM; } ~~/ For ~~large a~~lignments, ~~use a divide and conquer algorithm that is~~~~ O(NlogN). The result is not as precise, but this whole thing is an ~~approximation anyhow, and the~~ faster ~~response time is~~ an acce~~ptable~~ ** trade-off for reduced precision. / if( nLeftnRight>DIFF_ALIGN_MX && (pCfg->diffFlags & DIFF_SLOW_SBS)==0 ){ ~~const DLine aSmall; / The smaller of aLeft and aRight /~~ ~~const DLine aBig; /* The larger of aLeft and aRight /~~ ~~int nSmall, nBig; / Size of aSmall and aBig. nSmall<=nBig /~~ ~~int iDivSmall, iDivBig; / Divider point for aSmall and aBig /~~ int i~~DivLe~~ft, iDivRight; / D~~ivider~~ ~~point for aLeft and aRight /~~ unsigned char a~~1, a2; / Results of the alignments on two halves /~~ ~~int n1, n2; / Number of entries in a1 and a2 /~~ ~~int score, bestScore; / Score and best score seen so far /~~ ~~if( nLeft>nRight ){~~ ~~aSmall = aRight;~~ ~~nSmall = nRight;~~ a~~Big~~ = a~~Left;~~ ~~nBig = nLeft;~~ ~~}else{~~ ~~aSmall = aLeft;~~ ~~nSmall = nLeft;~~ ~~aBig = aRight;~~ nBig = nRight; } ~~iDivBig = nBig/2;~~ ~~iDivSmall = nSmall/2;~~ ~~bestScore = 10000;~~ ~~for(i=0; i<nSmall; i++){~~ ~~score = match_dline(aBig+iDivBig, aSmall+i) + abs(i-nSmall/2)2;~~ ~~if( score<bestScore ){~~ ~~bestScore = score;~~ ~~iDivSmall = i;~~ } } if( a~~Small==~~aR~~ight ){~~ ~~iDivRight = iDivSmall;~~ ~~iDivLeft = iDivBig;~~ ~~}else{~~ ~~iDivRight = iDivBig;~~ ~~iDivLeft = iDivSmall;~~ } ~~a1 = diffBlockAlignment(aLeft,iDivLeft,aRight,iDivRight,pCfg,&n1);~~ ~~a2 =~~ diffBlockAlignment~~(aLeft+i~~DivLe~~ft,~~ nLe~~ft-iDivLeft,~~ a~~Right+iDiv~~Right, nRight~~-iDiv~~R~~ight,~~ ~~pCfg, &n2);~~ ~~a1 = fossil_realloc(a1, n1+n2 );~~ ~~memcpy(a1+n1,a2,n2);~~ ~~fossil_free(a2);~~ pNResult = n1+n2; ~~return a1;~~ } / If we reach this point, we will be doing an O(NN) Wagner minimum * edit distance to compute the alignment. */ if( nRight < count(aBuf)-1 ){ pToFree = 0;	> \| > > > \| > \| < < < < < \| \| < < < < < \| < < < < < \| < < < < < < < < < < < \| < < < < < < \| \| < < < < < <	2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114	if( nRight==0 ){ aM = fossil_malloc( nLeft + 2 ); memset(aM, 1, nLeft); pNResult = nLeft; return aM; } if( pCfg->diffFlags & DIFF_DEBUG ){ fossil_print("BlockAlignment:\n [%.s] + %d\n [%.s] + %d\n", aLeft[0].n, aLeft[0].z, nLeft, aRight[0].n, aRight[0].z, nRight); } / For large alignments, try to use alternative algorithms that are ** faster than the O(NN) Wagner edit distance. / if( nLeftnRight>DIFF_ALIGN_MX && (pCfg->diffFlags & DIFF_SLOW_SBS)==0 ){ if( (pCfg->diffFlags & DIFF_IGNORE_ALLWS)==0 ){ unsigned char aRes; aRes = diffBlockAlignmentIgnoreSpace( aLeft, nLeft,aRight, nRight,pCfg,pNResult); if( aRes ) return aRes; } return diffBlockAlignmentDivideAndConquer( aLeft, nLeft,aRight, nRight,pCfg,pNResult); } /* If we reach this point, we will be doing an O(NN) Wagner minimum * edit distance to compute the alignment. */ if( nRight < count(aBuf)-1 ){ pToFree = 0;
︙			︙
2024 2025 2026 2027 2028 2029 2030 ~~2031 2032~~ 2033 2034 2035 2036 2037 2038 2039	** Format a diff using a DiffBuilder object / static void formatDiff( DContext p, /* The computed diff / DiffConfig pCfg, /* Configuration options / DiffBuilder pBuilder /* The formatter object / ){ ~~~~const~~ DLine A; /* Left side of the diff / ~~const~~ DLine B; /* Right side of the diff /~~ unsigned int a = 0; / Index of next line in A[] / unsigned int b = 0; / Index of next line in B[] / const int R; /* Array of COPY/DELETE/INSERT triples / unsigned int r; / Index into R[] / unsigned int nr; / Number of COPY/DELETE/INSERT triples to process / unsigned int mxr; / Maximum value for r / unsigned int na, nb; / Number of lines shown from A and B */	\| \|	2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216	** Format a diff using a DiffBuilder object / static void formatDiff( DContext p, /* The computed diff / DiffConfig pCfg, /* Configuration options / DiffBuilder pBuilder /* The formatter object / ){ DLine A; /* Left side of the diff / DLine B; /* Right side of the diff / unsigned int a = 0; / Index of next line in A[] / unsigned int b = 0; / Index of next line in B[] / const int R; /* Array of COPY/DELETE/INSERT triples / unsigned int r; / Index into R[] / unsigned int nr; / Number of COPY/DELETE/INSERT triples to process / unsigned int mxr; / Maximum value for r / unsigned int na, nb; / Number of lines shown from A and B */
︙			︙
2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421	expandEdit(p, p->nEdit2 + 15); if( p->aEdit==0 ) return; } p->aEdit[p->nEdit++] = nCopy; p->aEdit[p->nEdit++] = nDel; p->aEdit[p->nEdit++] = nIns; } / Do a single step in the difference. Compute a sequence of copy/delete/insert steps that will convert lines iS1 through iE1-1 of the input into lines iS2 through iE2-1 of the output and write that sequence into the difference context. **	> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > >	2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654	expandEdit(p, p->nEdit2 + 15); if( p->aEdit==0 ) return; } p->aEdit[p->nEdit++] = nCopy; p->aEdit[p->nEdit++] = nDel; p->aEdit[p->nEdit++] = nIns; } / A common subsequene between p->aFrom and p->aTo has been found. This routine tries to judge if the subsequence really is a valid match or rather is just an artifact of an indentation change. Return non-zero if the subsequence is valid. Return zero if the subsequence seems likely to be an editing artifact and should be ignored. This routine is a heuristic optimization intended to give more intuitive diff results following an indentation change it code that is formatted similarly to C/C++, Javascript, Go, TCL, and similar languages that use {...} for nesting. A correct diff is computed even if this routine always returns true (non-zero). But sometimes a more intuitive diff can result if this routine returns false. The subsequences consists of the rows iSX through iEX-1 (inclusive) in p->aFrom[]. The total sequences is iS1 through iE1-1 (inclusive) of p->aFrom[]. Example where this heuristic is useful, see the diff at https://www.sqlite.org/src/fdiff?v1=0e79dd15cbdb4f48&v2=33955a6fd874dd97 See also discussion at https://fossil-scm.org/forum/forumpost/9ba3284295 ALGORITHM (subject to change and refinement): 1. If the subsequence is larger than 1/7th of the original span, then consider it valid. --> return 1 2. If the subsequence contains any charaters other than '}', '{", or whitespace, then consider it valid. --> return 1 3. Otherwise, it is potentially an artifact of an indentation change. --> return 0 / static int likelyNotIndentChngArtifact( DContext p, /* The complete diff context / int iS1, / Start of the main segment / int iSX, / Start of the subsequence / int iEX, / First row past the end of the subsequence / int iE1 / First row past the end of the main segment / ){ int i, j; if( (iEX-iSX)7 >= (iE1-iS1) ) return 1; for(i=iSX; i<iEX; i++){ const char z = p->aFrom[i].z; for(j=p->aFrom[i].n-1; j>=0; j--){ char c = z[j]; if( c!='}' && c!='{' && !diff_isspace(c) ) return 1; } } return 0; } / Do a single step in the difference. Compute a sequence of copy/delete/insert steps that will convert lines iS1 through iE1-1 of the input into lines iS2 through iE2-1 of the output and write that sequence into the difference context. **
︙			︙
2440 2441 2442 2443 2444 2445 2446 ~~2447~~ 2448 2449 2450 2451 2452 2453 2454	appendTriple(p, 0, iE1-iS1, 0); return; } /* Find the longest matching segment between the two sequences / longestCommonSequence(p, iS1, iE1, iS2, iE2, &iSX, &iEX, &iSY, &iEY); ~~if( iEX>iSX ){~~ / A common segment has been found. ** Recursively diff either side of the matching segment */ diff_step(p, iS1, iSX, iS2, iSY); if( iEX>iSX ){ appendTriple(p, iEX - iSX, 0, 0); } diff_step(p, iEX, iE1, iEY, iE2);	\| > >	2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689	appendTriple(p, 0, iE1-iS1, 0); return; } /* Find the longest matching segment between the two sequences / longestCommonSequence(p, iS1, iE1, iS2, iE2, &iSX, &iEX, &iSY, &iEY); if( iEX>iSX+5 \|\| (iEX>iSX && likelyNotIndentChngArtifact(p,iS1,iSX,iEX,iE1) ) ){ / A common segment has been found. ** Recursively diff either side of the matching segment */ diff_step(p, iS1, iSX, iS2, iSY); if( iEX>iSX ){ appendTriple(p, iEX - iSX, 0, 0); } diff_step(p, iEX, iE1, iEY, iE2);
︙			︙