Fossil

Artifact Content
Login

Artifact e09a3a97c8a7c06a3007180d3ee68e08d7efdaf9:


     1  /*
     2  ** Copyright (c) 2007 D. Richard Hipp
     3  **
     4  ** This program is free software; you can redistribute it and/or
     5  ** modify it under the terms of the Simplified BSD License (also
     6  ** known as the "2-Clause License" or "FreeBSD License".)
     7  
     8  ** This program is distributed in the hope that it will be useful,
     9  ** but without any warranty; without even the implied warranty of
    10  ** merchantability or fitness for a particular purpose.
    11  **
    12  ** Author contact information:
    13  **   drh@hwaci.com
    14  **   http://www.hwaci.com/drh/
    15  **
    16  *******************************************************************************
    17  **
    18  ** This file contains code used to compute a "diff" between two
    19  ** text files.
    20  */
    21  #include "config.h"
    22  #include "diff.h"
    23  #include <assert.h>
    24  
    25  
    26  #if INTERFACE
    27  /*
    28  ** Flag parameters to the text_diff() routine used to control the formatting
    29  ** of the diff output.
    30  */
    31  #define DIFF_CONTEXT_MASK ((u64)0x0000ffff) /* Lines of context. Default if 0 */
    32  #define DIFF_WIDTH_MASK   ((u64)0x00ff0000) /* side-by-side column width */
    33  #define DIFF_IGNORE_EOLWS ((u64)0x01000000) /* Ignore end-of-line whitespace */
    34  #define DIFF_SIDEBYSIDE   ((u64)0x02000000) /* Generate a side-by-side diff */
    35  #define DIFF_NEWFILE      ((u64)0x04000000) /* Missing shown as empty files */
    36  #define DIFF_BRIEF        ((u64)0x08000000) /* Show filenames only */
    37  #define DIFF_INLINE       ((u64)0x00000000) /* Inline (not side-by-side) diff */
    38  #define DIFF_HTML         ((u64)0x10000000) /* Render for HTML */
    39  #define DIFF_LINENO       ((u64)0x20000000) /* Show line numbers */
    40  #define DIFF_WS_WARNING   ((u64)0x40000000) /* Warn about whitespace */
    41  #define DIFF_NOOPT        (((u64)0x01)<<32) /* Suppress optimizations (debug) */
    42  #define DIFF_INVERT       (((u64)0x02)<<32) /* Invert the diff (debug) */
    43  #define DIFF_CONTEXT_EX   (((u64)0x04)<<32) /* Use context even if zero */
    44  #define DIFF_NOTTOOBIG    (((u64)0x08)<<32) /* Only display if not too big */
    45  
    46  /*
    47  ** These error messages are shared in multiple locations.  They are defined
    48  ** here for consistency.
    49  */
    50  #define DIFF_CANNOT_COMPUTE_BINARY \
    51      "cannot compute difference between binary files\n"
    52  
    53  #define DIFF_CANNOT_COMPUTE_SYMLINK \
    54      "cannot compute difference between symlink and regular file\n"
    55  
    56  #define DIFF_TOO_MANY_CHANGES_TXT \
    57      "more than 10,000 changes\n"
    58  
    59  #define DIFF_TOO_MANY_CHANGES_HTML \
    60      "<p class='generalError'>More than 10,000 changes</p>\n"
    61  
    62  /*
    63  ** This macro is designed to return non-zero if the specified blob contains
    64  ** data that MAY be binary in nature; otherwise, zero will be returned.
    65  */
    66  #define looks_like_binary(blob) \
    67      ((looks_like_utf8((blob), LOOK_BINARY) & LOOK_BINARY) != LOOK_NONE)
    68  
    69  /*
    70  ** Output flags for the looks_like_utf8() and looks_like_utf16() routines used
    71  ** to convey status information about the blob content.
    72  */
    73  #define LOOK_NONE    ((int)0x00000000) /* Nothing special was found. */
    74  #define LOOK_NUL     ((int)0x00000001) /* One or more NUL chars were found. */
    75  #define LOOK_CR      ((int)0x00000002) /* One or more CR chars were found. */
    76  #define LOOK_LONE_CR ((int)0x00000004) /* An unpaired CR char was found. */
    77  #define LOOK_LF      ((int)0x00000008) /* One or more LF chars were found. */
    78  #define LOOK_LONE_LF ((int)0x00000010) /* An unpaired LF char was found. */
    79  #define LOOK_CRLF    ((int)0x00000020) /* One or more CR/LF pairs were found. */
    80  #define LOOK_LONG    ((int)0x00000040) /* An over length line was found. */
    81  #define LOOK_ODD     ((int)0x00000080) /* An odd number of bytes was found. */
    82  #define LOOK_SHORT   ((int)0x00000100) /* Unable to perform full check. */
    83  #define LOOK_INVALID ((int)0x00000200) /* Invalid sequence was found. */
    84  #define LOOK_BINARY  (LOOK_NUL | LOOK_LONG | LOOK_SHORT) /* May be binary. */
    85  #define LOOK_EOL     (LOOK_LONE_CR | LOOK_LONE_LF | LOOK_CRLF) /* Line seps. */
    86  #endif /* INTERFACE */
    87  
    88  /*
    89  ** Maximum length of a line in a text file, in bytes.  (2**13 = 8192 bytes)
    90  */
    91  #define LENGTH_MASK_SZ  13
    92  #define LENGTH_MASK     ((1<<LENGTH_MASK_SZ)-1)
    93  
    94  /*
    95  ** Information about each line of a file being diffed.
    96  **
    97  ** The lower LENGTH_MASK_SZ bits of the hash (DLine.h) are the length
    98  ** of the line.  If any line is longer than LENGTH_MASK characters,
    99  ** the file is considered binary.
   100  */
   101  typedef struct DLine DLine;
   102  struct DLine {
   103    const char *z;        /* The text of the line */
   104    unsigned int h;       /* Hash of the line */
   105    unsigned int iNext;   /* 1+(Index of next line with same the same hash) */
   106  
   107    /* an array of DLine elements serves two purposes.  The fields
   108    ** above are one per line of input text.  But each entry is also
   109    ** a bucket in a hash table, as follows: */
   110    unsigned int iHash;   /* 1+(first entry in the hash chain) */
   111  };
   112  
   113  /*
   114  ** Length of a dline
   115  */
116 #define LENGTH(X) ((X)->h & LENGTH_MASK)
117 118 /* 119 ** A context for running a raw diff. 120 ** 121 ** The aEdit[] array describes the raw diff. Each triple of integers in 122 ** aEdit[] means: 123 ** 124 ** (1) COPY: Number of lines aFrom and aTo have in common 125 ** (2) DELETE: Number of lines found only in aFrom 126 ** (3) INSERT: Number of lines found only in aTo 127 ** 128 ** The triples repeat until all lines of both aFrom and aTo are accounted 129 ** for. 130 */ 131 typedef struct DContext DContext; 132 struct DContext { 133 int *aEdit; /* Array of copy/delete/insert triples */ 134 int nEdit; /* Number of integers (3x num of triples) in aEdit[] */ 135 int nEditAlloc; /* Space allocated for aEdit[] */ 136 DLine *aFrom; /* File on left side of the diff */ 137 int nFrom; /* Number of lines in aFrom[] */ 138 DLine *aTo; /* File on right side of the diff */ 139 int nTo; /* Number of lines in aTo[] */ 140 }; 141 142 /* 143 ** Return an array of DLine objects containing a pointer to the 144 ** start of each line and a hash of that line. The lower 145 ** bits of the hash store the length of each line. 146 ** 147 ** Trailing whitespace is removed from each line. 2010-08-20: Not any 148 ** more. If trailing whitespace is ignored, the "patch" command gets 149 ** confused by the diff output. Ticket [a9f7b23c2e376af5b0e5b] 150 ** 151 ** Return 0 if the file is binary or contains a line that is 152 ** too long. 153 ** 154 ** Profiling show that in most cases this routine consumes the bulk of 155 ** the CPU time on a diff. 156 */ 157 static DLine *break_into_lines(const char *z, int n, int *pnLine, int ignoreWS){ 158 int nLine, i, j, k, x; 159 unsigned int h, h2; 160 DLine *a; 161 162 /* Count the number of lines. Allocate space to hold 163 ** the returned array. 164 */ 165 for(i=j=0, nLine=1; i<n; i++, j++){ 166 int c = z[i]; 167 if( c==0 ){ 168 return 0; 169 } 170 if( c=='\n' && z[i+1]!=0 ){ 171 nLine++; 172 if( j>LENGTH_MASK ){ 173 return 0; 174 } 175 j = 0; 176 } 177 } 178 if( j>LENGTH_MASK ){ 179 return 0; 180 } 181 a = fossil_malloc( nLine*sizeof(a[0]) ); 182 memset(a, 0, nLine*sizeof(a[0]) ); 183 if( n==0 ){ 184 *pnLine = 0; 185 return a; 186 } 187 188 /* Fill in the array */ 189 for(i=0; i<nLine; i++){ 190 a[i].z = z; 191 for(j=0; z[j] && z[j]!='\n'; j++){} 192 k = j; 193 while( ignoreWS && k>0 && fossil_isspace(z[k-1]) ){ k--; } 194 for(h=0, x=0; x<=k; x++){ 195 h = h ^ (h<<2) ^ z[x]; 196 } 197 a[i].h = h = (h<<LENGTH_MASK_SZ) | k;; 198 h2 = h % nLine; 199 a[i].iNext = a[h2].iHash; 200 a[h2].iHash = i+1; 201 z += j+1; 202 } 203 204 /* Return results */ 205 *pnLine = nLine; 206 return a; 207 } 208 209 /* 210 ** This function attempts to scan each logical line within the blob to 211 ** determine the type of content it appears to contain. The return value 212 ** is a combination of one or more of the LOOK_XXX flags (see above): 213 ** 214 ** !LOOK_BINARY -- The content appears to consist entirely of text; however, 215 ** the encoding may not be UTF-8. 216 ** 217 ** LOOK_BINARY -- The content appears to be binary because it contains one 218 ** or more embedded NUL characters or an extremely long line. 219 ** Since this function does not understand UTF-16, it may 220 ** falsely consider UTF-16 text to be binary. 221 ** 222 ** Additional flags (i.e. those other than the ones included in LOOK_BINARY) 223 ** may be present in the result as well; however, they should not impact the 224 ** determination of text versus binary content. 225 ** 226 ************************************ WARNING ********************************** 227 ** 228 ** This function does not validate that the blob content is properly formed 229 ** UTF-8. It assumes that all code points are the same size. It does not 230 ** validate any code points. It makes no attempt to detect if any [invalid] 231 ** switches between UTF-8 and other encodings occur. 232 ** 233 ** The only code points that this function cares about are the NUL character, 234 ** carriage-return, and line-feed. 235 ** 236 ** This function examines the contents of the blob until one of the flags 237 ** specified in "stopFlags" is set. 238 ** 239 ************************************ WARNING ********************************** 240 */ 241 int looks_like_utf8(const Blob *pContent, int stopFlags){ 242 const char *z = blob_buffer(pContent); 243 unsigned int n = blob_size(pContent); 244 int j, c, flags = LOOK_NONE; /* Assume UTF-8 text, prove otherwise */ 245 246 if( n==0 ) return flags; /* Empty file -> text */ 247 c = *z; 248 if( c==0 ){ 249 flags |= LOOK_NUL; /* NUL character in a file -> binary */ 250 }else if( c=='\r' ){ 251 flags |= LOOK_CR; 252 if( n<=1 || z[1]!='\n' ){ 253 flags |= LOOK_LONE_CR; /* More chars, next char is not LF */ 254 } 255 } 256 j = (c!='\n'); 257 if( !j ) flags |= (LOOK_LF | LOOK_LONE_LF); /* Found LF as first char */ 258 while( !(flags&stopFlags) && --n>0 ){ 259 int c2 = c; 260 c = *++z; ++j; 261 if( c==0 ){ 262 flags |= LOOK_NUL; /* NUL character in a file -> binary */ 263 }else if( c=='\n' ){ 264 flags |= LOOK_LF; 265 if( c2=='\r' ){ 266 flags |= (LOOK_CR | LOOK_CRLF); /* Found LF preceded by CR */ 267 }else{ 268 flags |= LOOK_LONE_LF; 269 } 270 if( j>LENGTH_MASK ){ 271 flags |= LOOK_LONG; /* Very long line -> binary */ 272 } 273 j = 0; 274 }else if( c=='\r' ){ 275 flags |= LOOK_CR; 276 if( n<=1 || z[1]!='\n' ){ 277 flags |= LOOK_LONE_CR; /* More chars, next char is not LF */ 278 } 279 } 280 } 281 if( n ){ 282 flags |= LOOK_SHORT; /* Not the whole blob is examined */ 283 } 284 if( j>LENGTH_MASK ){ 285 flags |= LOOK_LONG; /* Very long line -> binary */ 286 } 287 return flags; 288 } 289 290 /* 291 ** Define the type needed to represent a Unicode (UTF-16) character. 292 */ 293 #ifndef WCHAR_T 294 # ifdef _WIN32 295 # define WCHAR_T wchar_t 296 # else 297 # define WCHAR_T unsigned short 298 # endif 299 #endif 300 301 /* 302 ** Maximum length of a line in a text file, in UTF-16 characters. (4096) 303 ** The number of bytes represented by this value cannot exceed LENGTH_MASK 304 ** bytes, because that is the line buffer size used by the diff engine. 305 */ 306 #define UTF16_LENGTH_MASK_SZ (LENGTH_MASK_SZ-(sizeof(WCHAR_T)-sizeof(char))) 307 #define UTF16_LENGTH_MASK ((1<<UTF16_LENGTH_MASK_SZ)-1) 308 309 /* 310 ** This macro is used to swap the byte order of a UTF-16 character in the 311 ** looks_like_utf16() function. 312 */ 313 #define UTF16_SWAP(ch) ((((ch) << 8) & 0xFF00) | (((ch) >> 8) & 0xFF)) 314 #define UTF16_SWAP_IF(expr,ch) ((expr) ? UTF16_SWAP((ch)) : (ch)) 315 316 /* 317 ** This function attempts to scan each logical line within the blob to 318 ** determine the type of content it appears to contain. The return value 319 ** is a combination of one or more of the LOOK_XXX flags (see above): 320 ** 321 ** !LOOK_BINARY -- The content appears to consist entirely of text; however, 322 ** the encoding may not be UTF-16. 323 ** 324 ** LOOK_BINARY -- The content appears to be binary because it contains one 325 ** or more embedded NUL characters or an extremely long line. 326 ** Since this function does not understand UTF-8, it may 327 ** falsely consider UTF-8 text to be binary. 328 ** 329 ** Additional flags (i.e. those other than the ones included in LOOK_BINARY) 330 ** may be present in the result as well; however, they should not impact the 331 ** determination of text versus binary content. 332 ** 333 ************************************ WARNING ********************************** 334 ** 335 ** This function does not validate that the blob content is properly formed 336 ** UTF-16. It assumes that all code points are the same size. It does not 337 ** validate any code points. It makes no attempt to detect if any [invalid] 338 ** switches between the UTF-16be and UTF-16le encodings occur. 339 ** 340 ** The only code points that this function cares about are the NUL character, 341 ** carriage-return, and line-feed. 342 ** 343 ** This function examines the contents of the blob until one of the flags 344 ** specified in "stopFlags" is set. 345 ** 346 ************************************ WARNING ********************************** 347 */ 348 int looks_like_utf16(const Blob *pContent, int bReverse, int stopFlags){ 349 const WCHAR_T *z = (WCHAR_T *)blob_buffer(pContent); 350 unsigned int n = blob_size(pContent); 351 int j, c, flags = LOOK_NONE; /* Assume UTF-16 text, prove otherwise */ 352 353 if( n==0 ) return flags; /* Empty file -> text */ 354 if( n%sizeof(WCHAR_T) ){ 355 flags |= LOOK_ODD; /* Odd number of bytes -> binary (UTF-8?) */ 356 if( n<sizeof(WCHAR_T) ) return flags; /* One byte -> binary (UTF-8?) */ 357 } 358 c = *z; 359 if( bReverse ){ 360 c = UTF16_SWAP(c); 361 } 362 if( c==0 ){ 363 flags |= LOOK_NUL; /* NUL character in a file -> binary */ 364 }else if( c=='\r' ){ 365 flags |= LOOK_CR; 366 if( n<=sizeof(WCHAR_T) || UTF16_SWAP_IF(bReverse, z[1])!='\n' ){ 367 flags |= LOOK_LONE_CR; /* More chars, next char is not LF */ 368 } 369 } 370 j = (c!='\n'); 371 if( !j ) flags |= (LOOK_LF | LOOK_LONE_LF); /* Found LF as first char */ 372 while( 1 ){ 373 int c2 = c; 374 n -= sizeof(WCHAR_T); 375 if( (flags&stopFlags) || n<sizeof(WCHAR_T) ) break; 376 c = *++z; 377 if( bReverse ){ 378 c = UTF16_SWAP(c); 379 } 380 ++j; 381 if( c==0 ){ 382 flags |= LOOK_NUL; /* NUL character in a file -> binary */ 383 }else if( c=='\n' ){ 384 flags |= LOOK_LF; 385 if( c2=='\r' ){ 386 flags |= (LOOK_CR | LOOK_CRLF); /* Found LF preceded by CR */ 387 }else{ 388 flags |= LOOK_LONE_LF; 389 } 390 if( j>UTF16_LENGTH_MASK ){ 391 flags |= LOOK_LONG; /* Very long line -> binary */ 392 } 393 j = 0; 394 }else if( c=='\r' ){ 395 flags |= LOOK_CR; 396 if( n<=sizeof(WCHAR_T) || UTF16_SWAP_IF(bReverse, z[1])!='\n' ){ 397 flags |= LOOK_LONE_CR; /* More chars, next char is not LF */ 398 } 399 } 400 } 401 if( n ){ 402 flags |= LOOK_SHORT; /* Not the whole blob is examined */ 403 } 404 if( j>UTF16_LENGTH_MASK ){ 405 flags |= LOOK_LONG; /* Very long line -> binary */ 406 } 407 return flags; 408 } 409 410 /* 411 ** This function returns an array of bytes representing the byte-order-mark 412 ** for UTF-8. 413 */ 414 const unsigned char *get_utf8_bom(int *pnByte){ 415 static const unsigned char bom[] = { 416 0xEF, 0xBB, 0xBF, 0x00, 0x00, 0x00 417 }; 418 if( pnByte ) *pnByte = 3; 419 return bom; 420 } 421 422 /* 423 ** This function returns non-zero if the blob starts with a UTF-8 424 ** byte-order-mark (BOM). 425 */ 426 int starts_with_utf8_bom(const Blob *pContent, int *pnByte){ 427 const char *z = blob_buffer(pContent); 428 int bomSize = 0; 429 const unsigned char *bom = get_utf8_bom(&bomSize); 430 431 if( pnByte ) *pnByte = bomSize; 432 if( blob_size(pContent)<bomSize ) return 0; 433 return memcmp(z, bom, bomSize)==0; 434 } 435 436 /* 437 ** This function returns non-zero if the blob starts with a UTF-16 438 ** byte-order-mark (BOM), either in the endianness of the machine 439 ** or in reversed byte order. The UTF-32 BOM is ruled out by checking 440 ** if the UTF-16 BOM is not immediately followed by (utf16) 0. 441 ** pnByte and pbReverse are only set when the function returns 1. 442 */ 443 int starts_with_utf16_bom( 444 const Blob *pContent, /* IN: Blob content to perform BOM detection on. */ 445 int *pnByte, /* OUT: The number of bytes used for the BOM. */ 446 int *pbReverse /* OUT: Non-zero for BOM in reverse byte-order. */ 447 ){ 448 const unsigned short *z = (unsigned short *)blob_buffer(pContent); 449 int bomSize = sizeof(unsigned short); 450 int size = blob_size(pContent); 451 452 if( size<bomSize ) return 0; /* No: cannot read BOM. */ 453 if( size>=(2*bomSize) && z[1]==0 ) return 0; /* No: possible UTF-32. */ 454 if( z[0]==0xfffe ){ 455 if( pbReverse ) *pbReverse = 1; 456 }else if( z[0]==0xfeff ){ 457 if( pbReverse ) *pbReverse = 0; 458 }else{ 459 return 0; /* No: UTF-16 byte-order-mark not found. */ 460 } 461 if( pnByte ) *pnByte = bomSize; 462 return 1; /* Yes. */ 463 } 464 465 /* 466 ** Returns non-zero if the specified content could be valid UTF-16. 467 */ 468 int could_be_utf16(const Blob *pContent, int *pbReverse){ 469 return (blob_size(pContent) % sizeof(WCHAR_T) == 0) ? 470 starts_with_utf16_bom(pContent, 0, pbReverse) : 0; 471 } 472 473 /* 474 ** Return true if two DLine elements are identical. 475 */ 476 static int same_dline(DLine *pA, DLine *pB){ 477 return pA->h==pB->h && memcmp(pA->z,pB->z,pA->h & LENGTH_MASK)==0; 478 } 479 480 /* 481 ** Return true if the regular expression *pRe matches any of the 482 ** N dlines 483 */ 484 static int re_dline_match( 485 ReCompiled *pRe, /* The regular expression to be matched */ 486 DLine *aDLine, /* First of N DLines to compare against */ 487 int N /* Number of DLines to check */ 488 ){ 489 while( N-- ){ 490 if( re_match(pRe, (const unsigned char *)aDLine->z, LENGTH(aDLine)) ){ 491 return 1; 492 } 493 aDLine++; 494 } 495 return 0; 496 } 497 498 /* 499 ** Append a single line of context-diff output to pOut. 500 */ 501 static void appendDiffLine( 502 Blob *pOut, /* Where to write the line of output */ 503 char cPrefix, /* One of " ", "+", or "-" */ 504 DLine *pLine, /* The line to be output */ 505 int html, /* True if generating HTML. False for plain text */ 506 ReCompiled *pRe /* Colorize only if line matches this Regex */ 507 ){ 508 blob_append(pOut, &cPrefix, 1); 509 if( html ){ 510 char *zHtml; 511 if( pRe && re_dline_match(pRe, pLine, 1)==0 ){ 512 cPrefix = ' '; 513 }else if( cPrefix=='+' ){ 514 blob_append(pOut, "<span class=\"diffadd\">", -1); 515 }else if( cPrefix=='-' ){ 516 blob_append(pOut, "<span class=\"diffrm\">", -1); 517 } 518 zHtml = htmlize(pLine->z, (pLine->h & LENGTH_MASK)); 519 blob_append(pOut, zHtml, -1); 520 fossil_free(zHtml); 521 if( cPrefix!=' ' ){ 522 blob_append(pOut, "</span>", -1); 523 } 524 }else{ 525 blob_append(pOut, pLine->z, pLine->h & LENGTH_MASK); 526 } 527 blob_append(pOut, "\n", 1); 528 } 529 530 /* 531 ** Add two line numbers to the beginning of an output line for a context 532 ** diff. One or the other of the two numbers might be zero, which means 533 ** to leave that number field blank. The "html" parameter means to format 534 ** the output for HTML. 535 */ 536 static void appendDiffLineno(Blob *pOut, int lnA, int lnB, int html){ 537 if( html ) blob_append(pOut, "<span class=\"diffln\">", -1); 538 if( lnA>0 ){ 539 blob_appendf(pOut, "%6d ", lnA); 540 }else{ 541 blob_append(pOut, " ", 7); 542 } 543 if( lnB>0 ){ 544 blob_appendf(pOut, "%6d ", lnB); 545 }else{ 546 blob_append(pOut, " ", 8); 547 } 548 if( html ) blob_append(pOut, "</span>", -1); 549 } 550 551 /* 552 ** Given a raw diff p[] in which the p->aEdit[] array has been filled 553 ** in, compute a context diff into pOut. 554 */ 555 static void contextDiff( 556 DContext *p, /* The difference */ 557 Blob *pOut, /* Output a context diff to here */ 558 ReCompiled *pRe, /* Only show changes that match this regex */ 559 u64 diffFlags /* Flags controlling the diff format */ 560 ){ 561 DLine *A; /* Left side of the diff */ 562 DLine *B; /* Right side of the diff */ 563 int a = 0; /* Index of next line in A[] */ 564 int b = 0; /* Index of next line in B[] */ 565 int *R; /* Array of COPY/DELETE/INSERT triples */ 566 int r; /* Index into R[] */ 567 int nr; /* Number of COPY/DELETE/INSERT triples to process */ 568 int mxr; /* Maximum value for r */ 569 int na, nb; /* Number of lines shown from A and B */ 570 int i, j; /* Loop counters */ 571 int m; /* Number of lines to output */ 572 int skip; /* Number of lines to skip */ 573 int nChunk = 0; /* Number of diff chunks seen so far */ 574 int nContext; /* Number of lines of context */ 575 int showLn; /* Show line numbers */ 576 int html; /* Render as HTML */ 577 int showDivider = 0; /* True to show the divider between diff blocks */ 578 579 nContext = diff_context_lines(diffFlags); 580 showLn = (diffFlags & DIFF_LINENO)!=0; 581 html = (diffFlags & DIFF_HTML)!=0; 582 A = p->aFrom; 583 B = p->aTo; 584 R = p->aEdit; 585 mxr = p->nEdit; 586 while( mxr>2 && R[mxr-1]==0 && R[mxr-2]==0 ){ mxr -= 3; } 587 for(r=0; r<mxr; r += 3*nr){ 588 /* Figure out how many triples to show in a single block */ 589 for(nr=1; R[r+nr*3]>0 && R[r+nr*3]<nContext*2; nr++){} 590 /* printf("r=%d nr=%d\n", r, nr); */ 591 592 /* If there is a regex, skip this block (generate no diff output) 593 ** if the regex matches or does not match both insert and delete. 594 ** Only display the block if one side matches but the other side does 595 ** not. 596 */ 597 if( pRe ){ 598 int hideBlock = 1; 599 int xa = a, xb = b; 600 for(i=0; hideBlock && i<nr; i++){ 601 int c1, c2; 602 xa += R[r+i*3]; 603 xb += R[r+i*3]; 604 c1 = re_dline_match(pRe, &A[xa], R[r+i*3+1]); 605 c2 = re_dline_match(pRe, &B[xb], R[r+i*3+2]); 606 hideBlock = c1==c2; 607 xa += R[r+i*3+1]; 608 xb += R[r+i*3+2]; 609 } 610 if( hideBlock ){ 611 a = xa; 612 b = xb; 613 continue; 614 } 615 } 616 617 /* For the current block comprising nr triples, figure out 618 ** how many lines of A and B are to be displayed 619 */ 620 if( R[r]>nContext ){ 621 na = nb = nContext; 622 skip = R[r] - nContext; 623 }else{ 624 na = nb = R[r]; 625 skip = 0; 626 } 627 for(i=0; i<nr; i++){ 628 na += R[r+i*3+1]; 629 nb += R[r+i*3+2]; 630 } 631 if( R[r+nr*3]>nContext ){ 632 na += nContext; 633 nb += nContext; 634 }else{ 635 na += R[r+nr*3]; 636 nb += R[r+nr*3]; 637 } 638 for(i=1; i<nr; i++){ 639 na += R[r+i*3]; 640 nb += R[r+i*3]; 641 } 642 643 /* Show the header for this block, or if we are doing a modified 644 ** context diff that contains line numbers, show the separator from 645 ** the previous block. 646 */ 647 nChunk++; 648 if( showLn ){ 649 if( !showDivider ){ 650 /* Do not show a top divider */ 651 showDivider = 1; 652 }else if( html ){ 653 blob_appendf(pOut, "<span class=\"diffhr\">%.80c</span>\n", '.'); 654 blob_appendf(pOut, "<a name=\"chunk%d\"></a>\n", nChunk); 655 }else{ 656 blob_appendf(pOut, "%.80c\n", '.'); 657 } 658 }else{ 659 if( html ) blob_appendf(pOut, "<span class=\"diffln\">"); 660 /* 661 * If the patch changes an empty file or results in an empty file, 662 * the block header must use 0,0 as position indicator and not 1,0. 663 * Otherwise, patch would be confused and may reject the diff. 664 */ 665 blob_appendf(pOut,"@@ -%d,%d +%d,%d @@", 666 na ? a+skip+1 : 0, na, 667 nb ? b+skip+1 : 0, nb); 668 if( html ) blob_appendf(pOut, "</span>"); 669 blob_append(pOut, "\n", 1); 670 } 671 672 /* Show the initial common area */ 673 a += skip; 674 b += skip; 675 m = R[r] - skip; 676 for(j=0; j<m; j++){ 677 if( showLn ) appendDiffLineno(pOut, a+j+1, b+j+1, html); 678 appendDiffLine(pOut, ' ', &A[a+j], html, 0); 679 } 680 a += m; 681 b += m; 682 683 /* Show the differences */ 684 for(i=0; i<nr; i++){ 685 m = R[r+i*3+1]; 686 for(j=0; j<m; j++){ 687 if( showLn ) appendDiffLineno(pOut, a+j+1, 0, html); 688 appendDiffLine(pOut, '-', &A[a+j], html, pRe); 689 } 690 a += m; 691 m = R[r+i*3+2]; 692 for(j=0; j<m; j++){ 693 if( showLn ) appendDiffLineno(pOut, 0, b+j+1, html); 694 appendDiffLine(pOut, '+', &B[b+j], html, pRe); 695 } 696 b += m; 697 if( i<nr-1 ){ 698 m = R[r+i*3+3]; 699 for(j=0; j<m; j++){ 700 if( showLn ) appendDiffLineno(pOut, a+j+1, b+j+1, html); 701 appendDiffLine(pOut, ' ', &B[b+j], html, 0); 702 } 703 b += m; 704 a += m; 705 } 706 } 707 708 /* Show the final common area */ 709 assert( nr==i ); 710 m = R[r+nr*3]; 711 if( m>nContext ) m = nContext; 712 for(j=0; j<m; j++){ 713 if( showLn ) appendDiffLineno(pOut, a+j+1, b+j+1, html); 714 appendDiffLine(pOut, ' ', &B[b+j], html, 0); 715 } 716 } 717 } 718 719 /* 720 ** Status of a single output line 721 */ 722 typedef struct SbsLine SbsLine; 723 struct SbsLine { 724 char *zLine; /* The output line under construction */ 725 int n; /* Index of next unused slot in the zLine[] */ 726 int width; /* Maximum width of a column in the output */ 727 unsigned char escHtml; /* True to escape html characters */ 728 int iStart; /* Write zStart prior to character iStart */ 729 const char *zStart; /* A <span> tag */ 730 int iEnd; /* Write </span> prior to character iEnd */ 731 int iStart2; /* Write zStart2 prior to character iStart2 */ 732 const char *zStart2; /* A <span> tag */ 733 int iEnd2; /* Write </span> prior to character iEnd2 */ 734 ReCompiled *pRe; /* Only colorize matching lines, if not NULL */ 735 }; 736 737 /* 738 ** Flags for sbsWriteText() 739 */ 740 #define SBS_NEWLINE 0x0001 /* End with \n\000 */ 741 #define SBS_PAD 0x0002 /* Pad output to width spaces */ 742 743 /* 744 ** Write up to width characters of pLine into p->zLine[]. Translate tabs into 745 ** spaces. Add a newline if SBS_NEWLINE is set. Translate HTML characters 746 ** if SBS_HTML is set. Pad the rendering out width bytes if SBS_PAD is set. 747 ** 748 ** This comment contains multibyte unicode characters (ü, Æ, ð) in order 749 ** to test the ability of the diff code to handle such characters. 750 */ 751 static void sbsWriteText(SbsLine *p, DLine *pLine, unsigned flags){ 752 int n = pLine->h & LENGTH_MASK; 753 int i; /* Number of input characters consumed */ 754 int j; /* Number of output characters generated */ 755 int k; /* Cursor position */ 756 int needEndSpan = 0; 757 const char *zIn = pLine->z; 758 char *z = &p->zLine[p->n]; 759 int w = p->width; 760 int colorize = p->escHtml; 761 if( colorize && p->pRe && re_dline_match(p->pRe, pLine, 1)==0 ){ 762 colorize = 0; 763 } 764 for(i=j=k=0; k<w && i<n; i++, k++){ 765 char c = zIn[i]; 766 if( colorize ){ 767 if( i==p->iStart ){ 768 int x = strlen(p->zStart); 769 memcpy(z+j, p->zStart, x); 770 j += x; 771 needEndSpan = 1; 772 if( p->iStart2 ){ 773 p->iStart = p->iStart2; 774 p->zStart = p->zStart2; 775 p->iStart2 = 0; 776 } 777 }else if( i==p->iEnd ){ 778 memcpy(z+j, "</span>", 7); 779 j += 7; 780 needEndSpan = 0; 781 if( p->iEnd2 ){ 782 p->iEnd = p->iEnd2; 783 p->iEnd2 = 0; 784 } 785 } 786 } 787 if( c=='\t' ){ 788 z[j++] = ' '; 789 while( (k&7)!=7 && k<w ){ z[j++] = ' '; k++; } 790 }else if( c=='\r' || c=='\f' ){ 791 z[j++] = ' '; 792 }else if( c=='<' && p->escHtml ){ 793 memcpy(&z[j], "&lt;", 4); 794 j += 4; 795 }else if( c=='&' && p->escHtml ){ 796 memcpy(&z[j], "&amp;", 5); 797 j += 5; 798 }else if( c=='>' && p->escHtml ){ 799 memcpy(&z[j], "&gt;", 4); 800 j += 4; 801 }else if( c=='"' && p->escHtml ){ 802 memcpy(&z[j], "&quot;", 6); 803 j += 6; 804 }else{ 805 z[j++] = c; 806 if( (c&0xc0)==0x80 ) k--; 807 } 808 } 809 if( needEndSpan ){ 810 memcpy(&z[j], "</span>", 7); 811 j += 7; 812 } 813 if( (flags & SBS_PAD)!=0 ){ 814 while( k<w ){ k++; z[j++] = ' '; } 815 } 816 if( flags & SBS_NEWLINE ){ 817 z[j++] = '\n'; 818 } 819 p->n += j; 820 } 821 822 /* 823 ** Append a string to an SbSLine without coding, interpretation, or padding. 824 */ 825 static void sbsWrite(SbsLine *p, const char *zIn, int nIn){ 826 memcpy(p->zLine+p->n, zIn, nIn); 827 p->n += nIn; 828 } 829 830 /* 831 ** Append n spaces to the string. 832 */ 833 static void sbsWriteSpace(SbsLine *p, int n){ 834 while( n-- ) p->zLine[p->n++] = ' '; 835 } 836 837 /* 838 ** Append a string to the output only if we are rendering HTML. 839 */ 840 static void sbsWriteHtml(SbsLine *p, const char *zIn){ 841 if( p->escHtml ) sbsWrite(p, zIn, strlen(zIn)); 842 } 843 844 /* 845 ** Write a 6-digit line number followed by a single space onto the line. 846 */ 847 static void sbsWriteLineno(SbsLine *p, int ln){ 848 sbsWriteHtml(p, "<span class=\"diffln\">"); 849 sqlite3_snprintf(7, &p->zLine[p->n], "%5d ", ln+1); 850 p->n += 6; 851 sbsWriteHtml(p, "</span>"); 852 p->zLine[p->n++] = ' '; 853 } 854 855 /* 856 ** The two text segments zLeft and zRight are known to be different on 857 ** both ends, but they might have a common segment in the middle. If 858 ** they do not have a common segment, return 0. If they do have a large 859 ** common segment, return 1 and before doing so set: 860 ** 861 ** aLCS[0] = start of the common segment in zLeft 862 ** aLCS[1] = end of the common segment in zLeft 863 ** aLCS[2] = start of the common segment in zLeft 864 ** aLCS[3] = end of the common segment in zLeft 865 ** 866 ** This computation is for display purposes only and does not have to be 867 ** optimal or exact. 868 */ 869 static int textLCS( 870 const char *zLeft, int nA, /* String on the left */ 871 const char *zRight, int nB, /* String on the right */ 872 int *aLCS /* Identify bounds of LCS here */ 873 ){ 874 const unsigned char *zA = (const unsigned char*)zLeft; /* left string */ 875 const unsigned char *zB = (const unsigned char*)zRight; /* right string */ 876 int nt; /* Number of target points */ 877 int ti[3]; /* Index for start of each 4-byte target */ 878 unsigned int target[3]; /* 4-byte alignment targets */ 879 unsigned int probe; /* probe to compare against target */ 880 int iAS, iAE, iBS, iBE; /* Range of common segment */ 881 int i, j; /* Loop counters */ 882 int rc = 0; /* Result code. 1 for success */ 883 884 if( nA<6 || nB<6 ) return 0; 885 memset(aLCS, 0, sizeof(int)*4); 886 ti[0] = i = nB/2-2; 887 target[0] = (zB[i]<<24) | (zB[i+1]<<16) | (zB[i+2]<<8) | zB[i+3]; 888 probe = 0; 889 if( nB<16 ){ 890 nt = 1; 891 }else{ 892 ti[1] = i = nB/4-2; 893 target[1] = (zB[i]<<24) | (zB[i+1]<<16) | (zB[i+2]<<8) | zB[i+3]; 894 ti[2] = i = (nB*3)/4-2; 895 target[2] = (zB[i]<<24) | (zB[i+1]<<16) | (zB[i+2]<<8) | zB[i+3]; 896 nt = 3; 897 } 898 probe = (zA[0]<<16) | (zA[1]<<8) | zA[2]; 899 for(i=3; i<nA; i++){ 900 probe = (probe<<8) | zA[i]; 901 for(j=0; j<nt; j++){ 902 if( probe==target[j] ){ 903 iAS = i-3; 904 iAE = i+1; 905 iBS = ti[j]; 906 iBE = ti[j]+4; 907 while( iAE<nA && iBE<nB && zA[iAE]==zB[iBE] ){ iAE++; iBE++; } 908 while( iAS>0 && iBS>0 && zA[iAS-1]==zB[iBS-1] ){ iAS--; iBS--; } 909 if( iAE-iAS > aLCS[1] - aLCS[0] ){ 910 aLCS[0] = iAS; 911 aLCS[1] = iAE; 912 aLCS[2] = iBS; 913 aLCS[3] = iBE; 914 rc = 1; 915 } 916 } 917 } 918 } 919 return rc; 920 } 921 922 /* 923 ** Try to shift iStart as far as possible to the left. 924 */ 925 static void sbsShiftLeft(SbsLine *p, const char *z){ 926 int i, j; 927 while( (i=p->iStart)>0 && z[i-1]==z[i] ){ 928 for(j=i+1; j<p->iEnd && z[j-1]==z[j]; j++){} 929 if( j<p->iEnd ) break; 930 p->iStart--; 931 p->iEnd--; 932 } 933 } 934 935 /* 936 ** Simplify iStart and iStart2: 937 ** 938 ** * If iStart is a null-change then move iStart2 into iStart 939 ** * Make sure any null-changes are in canonoical form. 940 ** * Make sure all changes are at character boundaries for 941 ** multi-byte characters. 942 */ 943 static void sbsSimplifyLine(SbsLine *p, const char *z){ 944 if( p->iStart2==p->iEnd2 ){ 945 p->iStart2 = p->iEnd2 = 0; 946 }else if( p->iStart2 ){ 947 while( p->iStart2>0 && (z[p->iStart2]&0xc0)==0x80 ) p->iStart2--; 948 while( (z[p->iEnd2]&0xc0)==0x80 ) p->iEnd2++; 949 } 950 if( p->iStart==p->iEnd ){ 951 p->iStart = p->iStart2; 952 p->iEnd = p->iEnd2; 953 p->zStart = p->zStart2; 954 p->iStart2 = 0; 955 p->iEnd2 = 0; 956 } 957 if( p->iStart==p->iEnd ){ 958 p->iStart = p->iEnd = -1; 959 }else if( p->iStart>0 ){ 960 while( p->iStart>0 && (z[p->iStart]&0xc0)==0x80 ) p->iStart--; 961 while( (z[p->iEnd]&0xc0)==0x80 ) p->iEnd++; 962 } 963 } 964 965 /* 966 ** Write out lines that have been edited. Adjust the highlight to cover 967 ** only those parts of the line that actually changed. 968 */ 969 static void sbsWriteLineChange( 970 SbsLine *p, /* The SBS output line */ 971 DLine *pLeft, /* Left line of the change */ 972 int lnLeft, /* Line number for the left line */ 973 DLine *pRight, /* Right line of the change */ 974 int lnRight /* Line number of the right line */ 975 ){ 976 int nLeft; /* Length of left line in bytes */ 977 int nRight; /* Length of right line in bytes */ 978 int nShort; /* Shortest of left and right */ 979 int nPrefix; /* Length of common prefix */ 980 int nSuffix; /* Length of common suffix */ 981 const char *zLeft; /* Text of the left line */ 982 const char *zRight; /* Text of the right line */ 983 int nLeftDiff; /* nLeft - nPrefix - nSuffix */ 984 int nRightDiff; /* nRight - nPrefix - nSuffix */ 985 int aLCS[4]; /* Bounds of common middle segment */ 986 static const char zClassRm[] = "<span class=\"diffrm\">"; 987 static const char zClassAdd[] = "<span class=\"diffadd\">"; 988 static const char zClassChng[] = "<span class=\"diffchng\">"; 989 990 nLeft = pLeft->h & LENGTH_MASK; 991 zLeft = pLeft->z; 992 nRight = pRight->h & LENGTH_MASK; 993 zRight = pRight->z; 994 nShort = nLeft<nRight ? nLeft : nRight; 995 996 nPrefix = 0; 997 while( nPrefix<nShort && zLeft[nPrefix]==zRight[nPrefix] ){ 998 nPrefix++; 999 } 1000 if( nPrefix<nShort ){ 1001 while( nPrefix>0 && (zLeft[nPrefix]&0xc0)==0x80 ) nPrefix--; 1002 } 1003 nSuffix = 0; 1004 if( nPrefix<nShort ){ 1005 while( nSuffix<nShort && zLeft[nLeft-nSuffix-1]==zRight[nRight-nSuffix-1] ){ 1006 nSuffix++; 1007 } 1008 if( nSuffix<nShort ){ 1009 while( nSuffix>0 && (zLeft[nLeft-nSuffix]&0xc0)==0x80 ) nSuffix--; 1010 } 1011 if( nSuffix==nLeft || nSuffix==nRight ) nPrefix = 0; 1012 } 1013 if( nPrefix+nSuffix > nShort ) nPrefix = nShort - nSuffix; 1014 1015 1016 /* A single chunk of text inserted on the right */ 1017 if( nPrefix+nSuffix==nLeft ){ 1018 sbsWriteLineno(p, lnLeft); 1019 p->iStart2 = p->iEnd2 = 0; 1020 p->iStart = p->iEnd = -1; 1021 sbsWriteText(p, pLeft, SBS_PAD); 1022 if( nLeft==nRight && zLeft[nLeft]==zRight[nRight] ){ 1023 sbsWrite(p, " ", 3); 1024 }else{ 1025 sbsWrite(p, " | ", 3); 1026 } 1027 sbsWriteLineno(p, lnRight); 1028 p->iStart = nPrefix; 1029 p->iEnd = nRight - nSuffix; 1030 p->zStart = zClassAdd; 1031 sbsWriteText(p, pRight, SBS_NEWLINE); 1032 return; 1033 } 1034 1035 /* A single chunk of text deleted from the left */ 1036 if( nPrefix+nSuffix==nRight ){ 1037 /* Text deleted from the left */ 1038 sbsWriteLineno(p, lnLeft); 1039 p->iStart2 = p->iEnd2 = 0; 1040 p->iStart = nPrefix; 1041 p->iEnd = nLeft - nSuffix; 1042 p->zStart = zClassRm; 1043 sbsWriteText(p, pLeft, SBS_PAD); 1044 sbsWrite(p, " | ", 3); 1045 sbsWriteLineno(p, lnRight); 1046 p->iStart = p->iEnd = -1; 1047 sbsWriteText(p, pRight, SBS_NEWLINE); 1048 return; 1049 } 1050 1051 /* At this point we know that there is a chunk of text that has 1052 ** changed between the left and the right. Check to see if there 1053 ** is a large unchanged section in the middle of that changed block. 1054 */ 1055 nLeftDiff = nLeft - nSuffix - nPrefix; 1056 nRightDiff = nRight - nSuffix - nPrefix; 1057 if( p->escHtml 1058 && nLeftDiff >= 6 1059 && nRightDiff >= 6 1060 && textLCS(&zLeft[nPrefix], nLeftDiff, &zRight[nPrefix], nRightDiff, aLCS) 1061 ){ 1062 sbsWriteLineno(p, lnLeft); 1063 p->iStart = nPrefix; 1064 p->iEnd = nPrefix + aLCS[0]; 1065 if( aLCS[2]==0 ){ 1066 sbsShiftLeft(p, pLeft->z); 1067 p->zStart = zClassRm; 1068 }else{ 1069 p->zStart = zClassChng; 1070 } 1071 p->iStart2 = nPrefix + aLCS[1]; 1072 p->iEnd2 = nLeft - nSuffix; 1073 p->zStart2 = aLCS[3]==nRightDiff ? zClassRm : zClassChng; 1074 sbsSimplifyLine(p, zLeft+nPrefix); 1075 sbsWriteText(p, pLeft, SBS_PAD); 1076 sbsWrite(p, " | ", 3); 1077 sbsWriteLineno(p, lnRight); 1078 p->iStart = nPrefix; 1079 p->iEnd = nPrefix + aLCS[2]; 1080 if( aLCS[0]==0 ){ 1081 sbsShiftLeft(p, pRight->z); 1082 p->zStart = zClassAdd; 1083 }else{ 1084 p->zStart = zClassChng; 1085 } 1086 p->iStart2 = nPrefix + aLCS[3]; 1087 p->iEnd2 = nRight - nSuffix; 1088 p->zStart2 = aLCS[1]==nLeftDiff ? zClassAdd : zClassChng; 1089 sbsSimplifyLine(p, zRight+nPrefix); 1090 sbsWriteText(p, pRight, SBS_NEWLINE); 1091 return; 1092 } 1093 1094 /* If all else fails, show a single big change between left and right */ 1095 sbsWriteLineno(p, lnLeft); 1096 p->iStart2 = p->iEnd2 = 0; 1097 p->iStart = nPrefix; 1098 p->iEnd = nLeft - nSuffix; 1099 p->zStart = zClassChng; 1100 sbsWriteText(p, pLeft, SBS_PAD); 1101 sbsWrite(p, " | ", 3); 1102 sbsWriteLineno(p, lnRight); 1103 p->iEnd = nRight - nSuffix; 1104 sbsWriteText(p, pRight, SBS_NEWLINE); 1105 } 1106 1107 /* 1108 ** Minimum of two values 1109 */ 1110 static int minInt(int a, int b){ return a<b ? a : b; } 1111 1112 /* 1113 ** Return the number between 0 and 100 that is smaller the closer pA and 1114 ** pB match. Return 0 for a perfect match. Return 100 if pA and pB are 1115 ** completely different. 1116 ** 1117 ** The current algorithm is as follows: 1118 ** 1119 ** (1) Remove leading and trailing whitespace. 1120 ** (2) Truncate both strings to at most 250 characters 1121 ** (3) Find the length of the longest common subsequence 1122 ** (4) Longer common subsequences yield lower scores. 1123 */ 1124 static int match_dline(DLine *pA, DLine *pB){ 1125 const char *zA; /* Left string */ 1126 const char *zB; /* right string */ 1127 int nA; /* Bytes in zA[] */ 1128 int nB; /* Bytes in zB[] */ 1129 int avg; /* Average length of A and B */ 1130 int i, j, k; /* Loop counters */ 1131 int best = 0; /* Longest match found so far */ 1132 int score; /* Final score. 0..100 */ 1133 unsigned char c; /* Character being examined */ 1134 unsigned char aFirst[256]; /* aFirst[X] = index in zB[] of first char X */ 1135 unsigned char aNext[252]; /* aNext[i] = index in zB[] of next zB[i] char */ 1136 1137 zA = pA->z; 1138 zB = pB->z; 1139 nA = pA->h & LENGTH_MASK; 1140 nB = pB->h & LENGTH_MASK; 1141 while( nA>0 && fossil_isspace(zA[0]) ){ nA--; zA++; } 1142 while( nA>0 && fossil_isspace(zA[nA-1]) ){ nA--; } 1143 while( nB>0 && fossil_isspace(zB[0]) ){ nB--; zB++; } 1144 while( nB>0 && fossil_isspace(zB[nB-1]) ){ nB--; } 1145 if( nA>250 ) nA = 250; 1146 if( nB>250 ) nB = 250; 1147 avg = (nA+nB)/2; 1148 if( avg==0 ) return 0; 1149 if( nA==nB && memcmp(zA, zB, nA)==0 ) return 0; 1150 memset(aFirst, 0, sizeof(aFirst)); 1151 zA--; zB--; /* Make both zA[] and zB[] 1-indexed */ 1152 for(i=nB; i>0; i--){ 1153 c = (unsigned char)zB[i]; 1154 aNext[i] = aFirst[c]; 1155 aFirst[c] = i; 1156 } 1157 best = 0; 1158 for(i=1; i<=nA-best; i++){ 1159 c = (unsigned char)zA[i]; 1160 for(j=aFirst[c]; j>0 && j<nB-best; j = aNext[j]){ 1161 int limit = minInt(nA-i, nB-j); 1162 for(k=1; k<=limit && zA[k+i]==zB[k+j]; k++){} 1163 if( k>best ) best = k; 1164 } 1165 } 1166 score = (best>avg) ? 0 : (avg - best)*100/avg; 1167 1168 #if 0 1169 fprintf(stderr, "A: [%.*s]\nB: [%.*s]\nbest=%d avg=%d score=%d\n", 1170 nA, zA+1, nB, zB+1, best, avg, score); 1171 #endif 1172 1173 /* Return the result */ 1174 return score; 1175 } 1176 1177 /* 1178 ** There is a change block in which nLeft lines of text on the left are 1179 ** converted into nRight lines of text on the right. This routine computes 1180 ** how the lines on the left line up with the lines on the right. 1181 ** 1182 ** The return value is a buffer of unsigned characters, obtained from 1183 ** fossil_malloc(). (The caller needs to free the return value using 1184 ** fossil_free().) Entries in the returned array have values as follows: 1185 ** 1186 ** 1. Delete the next line of pLeft. 1187 ** 2. Insert the next line of pRight. 1188 ** 3. The next line of pLeft changes into the next line of pRight. 1189 ** 4. Delete one line from pLeft and add one line to pRight. 1190 ** 1191 ** Values larger than three indicate better matches. 1192 ** 1193 ** The length of the returned array will be just large enough to cause 1194 ** all elements of pLeft and pRight to be consumed. 1195 ** 1196 ** Algorithm: Wagner's minimum edit-distance algorithm, modified by 1197 ** adding a cost to each match based on how well the two rows match 1198 ** each other. Insertion and deletion costs are 50. Match costs 1199 ** are between 0 and 100 where 0 is a perfect match 100 is a complete 1200 ** mismatch. 1201 */ 1202 static unsigned char *sbsAlignment( 1203 DLine *aLeft, int nLeft, /* Text on the left */ 1204 DLine *aRight, int nRight /* Text on the right */ 1205 ){ 1206 int i, j, k; /* Loop counters */ 1207 int *a; /* One row of the Wagner matrix */ 1208 int *pToFree; /* Space that needs to be freed */ 1209 unsigned char *aM; /* Wagner result matrix */ 1210 int nMatch, iMatch; /* Number of matching lines and match score */ 1211 int mnLen; /* MIN(nLeft, nRight) */ 1212 int mxLen; /* MAX(nLeft, nRight) */ 1213 int aBuf[100]; /* Stack space for a[] if nRight not to big */ 1214 1215 aM = fossil_malloc( (nLeft+1)*(nRight+1) ); 1216 if( nLeft==0 ){ 1217 memset(aM, 2, nRight); 1218 return aM; 1219 } 1220 if( nRight==0 ){ 1221 memset(aM, 1, nLeft); 1222 return aM; 1223 } 1224 1225 /* This algorithm is O(N**2). So if N is too big, bail out with a 1226 ** simple (but stupid and ugly) result that doesn't take too long. */ 1227 mnLen = nLeft<nRight ? nLeft : nRight; 1228 if( nLeft*nRight>100000 ){ 1229 memset(aM, 4, mnLen); 1230 if( nLeft>mnLen ) memset(aM+mnLen, 1, nLeft-mnLen); 1231 if( nRight>mnLen ) memset(aM+mnLen, 2, nRight-mnLen); 1232 return aM; 1233 } 1234 1235 if( nRight < (sizeof(aBuf)/sizeof(aBuf[0]))-1 ){ 1236 pToFree = 0; 1237 a = aBuf; 1238 }else{ 1239 a = pToFree = fossil_malloc( sizeof(a[0])*(nRight+1) ); 1240 } 1241 1242 /* Compute the best alignment */ 1243 for(i=0; i<=nRight; i++){ 1244 aM[i] = 2; 1245 a[i] = i*50; 1246 } 1247 aM[0] = 0; 1248 for(j=1; j<=nLeft; j++){ 1249 int p = a[0]; 1250 a[0] = p+50; 1251 aM[j*(nRight+1)] = 1; 1252 for(i=1; i<=nRight; i++){ 1253 int m = a[i-1]+50; 1254 int d = 2; 1255 if( m>a[i]+50 ){ 1256 m = a[i]+50; 1257 d = 1; 1258 } 1259 if( m>p ){ 1260 int score = match_dline(&aLeft[j-1], &aRight[i-1]); 1261 if( (score<=63 || (i<j+1 && i>j-1)) && m>p+score ){ 1262 m = p+score; 1263 d = 3 | score*4; 1264 } 1265 } 1266 p = a[i]; 1267 a[i] = m; 1268 aM[j*(nRight+1)+i] = d; 1269 } 1270 } 1271 1272 /* Compute the lowest-cost path back through the matrix */ 1273 i = nRight; 1274 j = nLeft; 1275 k = (nRight+1)*(nLeft+1)-1; 1276 nMatch = iMatch = 0; 1277 while( i+j>0 ){ 1278 unsigned char c = aM[k]; 1279 if( c>=3 ){ 1280 assert( i>0 && j>0 ); 1281 i--; 1282 j--; 1283 nMatch++; 1284 iMatch += (c>>2); 1285 aM[k] = 3; 1286 }else if( c==2 ){ 1287 assert( i>0 ); 1288 i--; 1289 }else{ 1290 assert( j>0 ); 1291 j--; 1292 } 1293 k--; 1294 aM[k] = aM[j*(nRight+1)+i]; 1295 } 1296 k++; 1297 i = (nRight+1)*(nLeft+1) - k; 1298 memmove(aM, &aM[k], i); 1299 1300 /* If: 1301 ** (1) the alignment is more than 25% longer than the longest side, and 1302 ** (2) the average match cost exceeds 15 1303 ** Then this is probably an alignment that will be difficult for humans 1304 ** to read. So instead, just show all of the right side inserted followed 1305 ** by all of the left side deleted. 1306 ** 1307 ** The coefficients for conditions (1) and (2) above are determined by 1308 ** experimentation. 1309 */ 1310 mxLen = nLeft>nRight ? nLeft : nRight; 1311 if( i*4>mxLen*5 && (nMatch==0 || iMatch/nMatch>15) ){ 1312 memset(aM, 4, mnLen); 1313 if( nLeft>mnLen ) memset(aM+mnLen, 1, nLeft-mnLen); 1314 if( nRight>mnLen ) memset(aM+mnLen, 2, nRight-mnLen); 1315 } 1316 1317 /* Return the result */ 1318 fossil_free(pToFree); 1319 return aM; 1320 } 1321 1322 /* 1323 ** R[] is an array of six integer, two COPY/DELETE/INSERT triples for a 1324 ** pair of adjacent differences. Return true if the gap between these 1325 ** two differences is so small that they should be rendered as a single 1326 ** edit. 1327 */ 1328 static int smallGap(int *R){ 1329 return R[3]<=2 || R[3]<=(R[1]+R[2]+R[4]+R[5])/8; 1330 } 1331 1332 /* 1333 ** Given a diff context in which the aEdit[] array has been filled 1334 ** in, compute a side-by-side diff into pOut. 1335 */ 1336 static void sbsDiff( 1337 DContext *p, /* The computed diff */ 1338 Blob *pOut, /* Write the results here */ 1339 ReCompiled *pRe, /* Only show changes that match this regex */ 1340 u64 diffFlags /* Flags controlling the diff */ 1341 ){ 1342 DLine *A; /* Left side of the diff */ 1343 DLine *B; /* Right side of the diff */ 1344 int a = 0; /* Index of next line in A[] */ 1345 int b = 0; /* Index of next line in B[] */ 1346 int *R; /* Array of COPY/DELETE/INSERT triples */ 1347 int r; /* Index into R[] */ 1348 int nr; /* Number of COPY/DELETE/INSERT triples to process */ 1349 int mxr; /* Maximum value for r */ 1350 int na, nb; /* Number of lines shown from A and B */ 1351 int i, j; /* Loop counters */ 1352 int m, ma, mb;/* Number of lines to output */ 1353 int skip; /* Number of lines to skip */ 1354 int nChunk = 0; /* Number of chunks of diff output seen so far */ 1355 SbsLine s; /* Output line buffer */ 1356 int nContext; /* Lines of context above and below each change */ 1357 int showDivider = 0; /* True to show the divider */ 1358 1359 memset(&s, 0, sizeof(s)); 1360 s.width = diff_width(diffFlags); 1361 s.zLine = fossil_malloc( 15*s.width + 200 ); 1362 if( s.zLine==0 ) return; 1363 nContext = diff_context_lines(diffFlags); 1364 s.escHtml = (diffFlags & DIFF_HTML)!=0; 1365 s.pRe = pRe; 1366 s.iStart = -1; 1367 s.iStart2 = 0; 1368 s.iEnd = -1; 1369 A = p->aFrom; 1370 B = p->aTo; 1371 R = p->aEdit; 1372 mxr = p->nEdit; 1373 while( mxr>2 && R[mxr-1]==0 && R[mxr-2]==0 ){ mxr -= 3; } 1374 for(r=0; r<mxr; r += 3*nr){ 1375 /* Figure out how many triples to show in a single block */ 1376 for(nr=1; R[r+nr*3]>0 && R[r+nr*3]<nContext*2; nr++){} 1377 /* printf("r=%d nr=%d\n", r, nr); */ 1378 1379 /* If there is a regex, skip this block (generate no diff output) 1380 ** if the regex matches or does not match both insert and delete. 1381 ** Only display the block if one side matches but the other side does 1382 ** not. 1383 */ 1384 if( pRe ){ 1385 int hideBlock = 1; 1386 int xa = a, xb = b; 1387 for(i=0; hideBlock && i<nr; i++){ 1388 int c1, c2; 1389 xa += R[r+i*3]; 1390 xb += R[r+i*3]; 1391 c1 = re_dline_match(pRe, &A[xa], R[r+i*3+1]); 1392 c2 = re_dline_match(pRe, &B[xb], R[r+i*3+2]); 1393 hideBlock = c1==c2; 1394 xa += R[r+i*3+1]; 1395 xb += R[r+i*3+2]; 1396 } 1397 if( hideBlock ){ 1398 a = xa; 1399 b = xb; 1400 continue; 1401 } 1402 } 1403 1404 /* For the current block comprising nr triples, figure out 1405 ** how many lines of A and B are to be displayed 1406 */ 1407 if( R[r]>nContext ){ 1408 na = nb = nContext; 1409 skip = R[r] - nContext; 1410 }else{ 1411 na = nb = R[r]; 1412 skip = 0; 1413 } 1414 for(i=0; i<nr; i++){ 1415 na += R[r+i*3+1]; 1416 nb += R[r+i*3+2]; 1417 } 1418 if( R[r+nr*3]>nContext ){ 1419 na += nContext; 1420 nb += nContext; 1421 }else{ 1422 na += R[r+nr*3]; 1423 nb += R[r+nr*3]; 1424 } 1425 for(i=1; i<nr; i++){ 1426 na += R[r+i*3]; 1427 nb += R[r+i*3]; 1428 } 1429 1430 /* Draw the separator between blocks */ 1431 if( showDivider ){ 1432 if( s.escHtml ){ 1433 blob_appendf(pOut, "<span class=\"diffhr\">%.*c</span>\n", 1434 s.width*2+16, '.'); 1435 }else{ 1436 blob_appendf(pOut, "%.*c\n", s.width*2+16, '.'); 1437 } 1438 } 1439 showDivider = 1; 1440 nChunk++; 1441 if( s.escHtml ){ 1442 blob_appendf(pOut, "<a name=\"chunk%d\"></a>\n", nChunk); 1443 } 1444 1445 /* Show the initial common area */ 1446 a += skip; 1447 b += skip; 1448 m = R[r] - skip; 1449 for(j=0; j<m; j++){ 1450 s.n = 0; 1451 sbsWriteLineno(&s, a+j); 1452 s.iStart = s.iEnd = -1; 1453 sbsWriteText(&s, &A[a+j], SBS_PAD); 1454 sbsWrite(&s, " ", 3); 1455 sbsWriteLineno(&s, b+j); 1456 sbsWriteText(&s, &B[b+j], SBS_NEWLINE); 1457 blob_append(pOut, s.zLine, s.n); 1458 } 1459 a += m; 1460 b += m; 1461 1462 /* Show the differences */ 1463 for(i=0; i<nr; i++){ 1464 unsigned char *alignment; 1465 ma = R[r+i*3+1]; /* Lines on left but not on right */ 1466 mb = R[r+i*3+2]; /* Lines on right but not on left */ 1467 1468 /* If the gap between the current diff and then next diff within the 1469 ** same block is not too great, then render them as if they are a 1470 ** single diff. */ 1471 while( i<nr-1 && smallGap(&R[r+i*3]) ){ 1472 i++; 1473 m = R[r+i*3]; 1474 ma += R[r+i*3+1] + m; 1475 mb += R[r+i*3+2] + m; 1476 } 1477 1478 alignment = sbsAlignment(&A[a], ma, &B[b], mb); 1479 for(j=0; ma+mb>0; j++){ 1480 if( alignment[j]==1 ){ 1481 /* Delete one line from the left */ 1482 s.n = 0; 1483 sbsWriteLineno(&s, a); 1484 s.iStart = 0; 1485 s.zStart = "<span class=\"diffrm\">"; 1486 s.iEnd = LENGTH(&A[a]); 1487 sbsWriteText(&s, &A[a], SBS_PAD); 1488 if( s.escHtml ){ 1489 sbsWrite(&s, " &lt;\n", 6); 1490 }else{ 1491 sbsWrite(&s, " <\n", 3); 1492 } 1493 blob_append(pOut, s.zLine, s.n); 1494 assert( ma>0 ); 1495 ma--; 1496 a++; 1497 }else if( alignment[j]==3 ){ 1498 /* The left line is changed into the right line */ 1499 s.n = 0; 1500 sbsWriteLineChange(&s, &A[a], a, &B[b], b); 1501 blob_append(pOut, s.zLine, s.n); 1502 assert( ma>0 && mb>0 ); 1503 ma--; 1504 mb--; 1505 a++; 1506 b++; 1507 }else if( alignment[j]==2 ){ 1508 /* Insert one line on the right */ 1509 s.n = 0; 1510 sbsWriteSpace(&s, s.width + 7); 1511 if( s.escHtml ){ 1512 sbsWrite(&s, " &gt; ", 6); 1513 }else{ 1514 sbsWrite(&s, " > ", 3); 1515 } 1516 sbsWriteLineno(&s, b); 1517 s.iStart = 0; 1518 s.zStart = "<span class=\"diffadd\">"; 1519 s.iEnd = LENGTH(&B[b]); 1520 sbsWriteText(&s, &B[b], SBS_NEWLINE); 1521 blob_append(pOut, s.zLine, s.n); 1522 assert( mb>0 ); 1523 mb--; 1524 b++; 1525 }else{ 1526 /* Delete from the left and insert on the right */ 1527 s.n = 0; 1528 sbsWriteLineno(&s, a); 1529 s.iStart = 0; 1530 s.zStart = "<span class=\"diffrm\">"; 1531 s.iEnd = LENGTH(&A[a]); 1532 sbsWriteText(&s, &A[a], SBS_PAD); 1533 sbsWrite(&s, " | ", 3); 1534 sbsWriteLineno(&s, b); 1535 s.iStart = 0; 1536 s.zStart = "<span class=\"diffadd\">"; 1537 s.iEnd = LENGTH(&B[b]); 1538 sbsWriteText(&s, &B[b], SBS_NEWLINE); 1539 blob_append(pOut, s.zLine, s.n); 1540 ma--; 1541 mb--; 1542 a++; 1543 b++; 1544 } 1545 1546 } 1547 fossil_free(alignment); 1548 if( i<nr-1 ){ 1549 m = R[r+i*3+3]; 1550 for(j=0; j<m; j++){ 1551 s.n = 0; 1552 sbsWriteLineno(&s, a+j); 1553 s.iStart = s.iEnd = -1; 1554 sbsWriteText(&s, &A[a+j], SBS_PAD); 1555 sbsWrite(&s, " ", 3); 1556 sbsWriteLineno(&s, b+j); 1557 sbsWriteText(&s, &B[b+j], SBS_NEWLINE); 1558 blob_append(pOut, s.zLine, s.n); 1559 } 1560 b += m; 1561 a += m; 1562 } 1563 } 1564 1565 /* Show the final common area */ 1566 assert( nr==i ); 1567 m = R[r+nr*3]; 1568 if( m>nContext ) m = nContext; 1569 for(j=0; j<m; j++){ 1570 s.n = 0; 1571 sbsWriteLineno(&s, a+j); 1572 s.iStart = s.iEnd = -1; 1573 sbsWriteText(&s, &A[a+j], SBS_PAD); 1574 sbsWrite(&s, " ", 3); 1575 sbsWriteLineno(&s, b+j); 1576 sbsWriteText(&s, &B[b+j], SBS_NEWLINE); 1577 blob_append(pOut, s.zLine, s.n); 1578 } 1579 } 1580 free(s.zLine); 1581 } 1582 1583 /* 1584 ** Compute the optimal longest common subsequence (LCS) using an 1585 ** exhaustive search. This version of the LCS is only used for 1586 ** shorter input strings since runtime is O(N*N) where N is the 1587 ** input string length. 1588 */ 1589 static void optimalLCS( 1590 DContext *p, /* Two files being compared */ 1591 int iS1, int iE1, /* Range of lines in p->aFrom[] */ 1592 int iS2, int iE2, /* Range of lines in p->aTo[] */ 1593 int *piSX, int *piEX, /* Write p->aFrom[] common segment here */ 1594 int *piSY, int *piEY /* Write p->aTo[] common segment here */ 1595 ){ 1596 int mxLength = 0; /* Length of longest common subsequence */ 1597 int i, j; /* Loop counters */ 1598 int k; /* Length of a candidate subsequence */ 1599 int iSXb = iS1; /* Best match so far */ 1600 int iSYb = iS2; /* Best match so far */ 1601 1602 for(i=iS1; i<iE1-mxLength; i++){ 1603 for(j=iS2; j<iE2-mxLength; j++){ 1604 if( !same_dline(&p->aFrom[i], &p->aTo[j]) ) continue; 1605 if( mxLength && !same_dline(&p->aFrom[i+mxLength], &p->aTo[j+mxLength]) ){ 1606 continue; 1607 } 1608 k = 1; 1609 while( i+k<iE1 && j+k<iE2 && same_dline(&p->aFrom[i+k],&p->aTo[j+k]) ){ 1610 k++; 1611 } 1612 if( k>mxLength ){ 1613 iSXb = i; 1614 iSYb = j; 1615 mxLength = k; 1616 } 1617 } 1618 } 1619 *piSX = iSXb; 1620 *piEX = iSXb + mxLength; 1621 *piSY = iSYb; 1622 *piEY = iSYb + mxLength; 1623 } 1624 1625 /* 1626 ** Compare two blocks of text on lines iS1 through iE1-1 of the aFrom[] 1627 ** file and lines iS2 through iE2-1 of the aTo[] file. Locate a sequence 1628 ** of lines in these two blocks that are exactly the same. Return 1629 ** the bounds of the matching sequence. 1630 ** 1631 ** If there are two or more possible answers of the same length, the 1632 ** returned sequence should be the one closest to the center of the 1633 ** input range. 1634 ** 1635 ** Ideally, the common sequence should be the longest possible common 1636 ** sequence. However, an exact computation of LCS is O(N*N) which is 1637 ** way too slow for larger files. So this routine uses an O(N) 1638 ** heuristic approximation based on hashing that usually works about 1639 ** as well. But if the O(N) algorithm doesn't get a good solution 1640 ** and N is not too large, we fall back to an exact solution by 1641 ** calling optimalLCS(). 1642 */ 1643 static void longestCommonSequence( 1644 DContext *p, /* Two files being compared */ 1645 int iS1, int iE1, /* Range of lines in p->aFrom[] */ 1646 int iS2, int iE2, /* Range of lines in p->aTo[] */ 1647 int *piSX, int *piEX, /* Write p->aFrom[] common segment here */ 1648 int *piSY, int *piEY /* Write p->aTo[] common segment here */ 1649 ){ 1650 double bestScore = -1e30; /* Best score seen so far */ 1651 int i, j, k; /* Loop counters */ 1652 int n; /* Loop limit */ 1653 DLine *pA, *pB; /* Pointers to lines */ 1654 int iSX, iSY, iEX, iEY; /* Current match */ 1655 double score; /* Current score */ 1656 int skew; /* How lopsided is the match */ 1657 int dist; /* Distance of match from center */ 1658 int mid; /* Center of the span */ 1659 int iSXb, iSYb, iEXb, iEYb; /* Best match so far */ 1660 int iSXp, iSYp, iEXp, iEYp; /* Previous match */ 1661 1662 1663 iSXb = iSXp = iS1; 1664 iEXb = iEXp = iS1; 1665 iSYb = iSYp = iS2; 1666 iEYb = iEYp = iS2; 1667 mid = (iE1 + iS1)/2; 1668 for(i=iS1; i<iE1; i++){ 1669 int limit = 0; 1670 j = p->aTo[p->aFrom[i].h % p->nTo].iHash; 1671 while( j>0 1672 && (j-1<iS2 || j>=iE2 || !same_dline(&p->aFrom[i], &p->aTo[j-1])) 1673 ){ 1674 if( limit++ > 10 ){ 1675 j = 0; 1676 break; 1677 } 1678 j = p->aTo[j-1].iNext; 1679 } 1680 if( j==0 ) continue; 1681 assert( i>=iSXb && i>=iSXp ); 1682 if( i<iEXb && j>=iSYb && j<iEYb ) continue; 1683 if( i<iEXp && j>=iSYp && j<iEYp ) continue; 1684 iSX = i; 1685 iSY = j-1; 1686 pA = &p->aFrom[iSX-1]; 1687 pB = &p->aTo[iSY-1]; 1688 n = minInt(iSX-iS1, iSY-iS2); 1689 for(k=0; k<n && same_dline(pA,pB); k++, pA--, pB--){} 1690 iSX -= k; 1691 iSY -= k; 1692 iEX = i+1; 1693 iEY = j; 1694 pA = &p->aFrom[iEX]; 1695 pB = &p->aTo[iEY]; 1696 n = minInt(iE1-iEX, iE2-iEY); 1697 for(k=0; k<n && same_dline(pA,pB); k++, pA++, pB++){} 1698 iEX += k; 1699 iEY += k; 1700 skew = (iSX-iS1) - (iSY-iS2); 1701 if( skew<0 ) skew = -skew; 1702 dist = (iSX+iEX)/2 - mid; 1703 if( dist<0 ) dist = -dist; 1704 score = (iEX - iSX) - 0.05*skew - 0.05*dist; 1705 if( score>bestScore ){ 1706 bestScore = score; 1707 iSXb = iSX; 1708 iSYb = iSY; 1709 iEXb = iEX; 1710 iEYb = iEY; 1711 }else if( iEX>iEXp ){ 1712 iSXp = iSX; 1713 iSYp = iSY; 1714 iEXp = iEX; 1715 iEYp = iEY; 1716 } 1717 } 1718 if( iSXb==iEXb && (iE1-iS1)*(iE2-iS2)<400 ){ 1719 /* If no common sequence is found using the hashing heuristic and 1720 ** the input is not too big, use the expensive exact solution */ 1721 optimalLCS(p, iS1, iE1, iS2, iE2, piSX, piEX, piSY, piEY); 1722 }else{ 1723 *piSX = iSXb; 1724 *piSY = iSYb; 1725 *piEX = iEXb; 1726 *piEY = iEYb; 1727 } 1728 /* printf("LCS(%d..%d/%d..%d) = %d..%d/%d..%d\n", 1729 iS1, iE1, iS2, iE2, *piSX, *piEX, *piSY, *piEY); */ 1730 } 1731 1732 /* 1733 ** Expand the size of aEdit[] array to hold at least nEdit elements. 1734 */ 1735 static void expandEdit(DContext *p, int nEdit){ 1736 p->aEdit = fossil_realloc(p->aEdit, nEdit*sizeof(int)); 1737 p->nEditAlloc = nEdit; 1738 } 1739 1740 /* 1741 ** Append a new COPY/DELETE/INSERT triple. 1742 */ 1743 static void appendTriple(DContext *p, int nCopy, int nDel, int nIns){ 1744 /* printf("APPEND %d/%d/%d\n", nCopy, nDel, nIns); */ 1745 if( p->nEdit>=3 ){ 1746 if( p->aEdit[p->nEdit-1]==0 ){ 1747 if( p->aEdit[p->nEdit-2]==0 ){ 1748 p->aEdit[p->nEdit-3] += nCopy; 1749 p->aEdit[p->nEdit-2] += nDel; 1750 p->aEdit[p->nEdit-1] += nIns; 1751 return; 1752 } 1753 if( nCopy==0 ){ 1754 p->aEdit[p->nEdit-2] += nDel; 1755 p->aEdit[p->nEdit-1] += nIns; 1756 return; 1757 } 1758 } 1759 if( nCopy==0 && nDel==0 ){ 1760 p->aEdit[p->nEdit-1] += nIns; 1761 return; 1762 } 1763 } 1764 if( p->nEdit+3>p->nEditAlloc ){ 1765 expandEdit(p, p->nEdit*2 + 15); 1766 if( p->aEdit==0 ) return; 1767 } 1768 p->aEdit[p->nEdit++] = nCopy; 1769 p->aEdit[p->nEdit++] = nDel; 1770 p->aEdit[p->nEdit++] = nIns; 1771 } 1772 1773 /* 1774 ** Do a single step in the difference. Compute a sequence of 1775 ** copy/delete/insert steps that will convert lines iS1 through iE1-1 of 1776 ** the input into lines iS2 through iE2-1 of the output and write 1777 ** that sequence into the difference context. 1778 ** 1779 ** The algorithm is to find a block of common text near the middle 1780 ** of the two segments being diffed. Then recursively compute 1781 ** differences on the blocks before and after that common segment. 1782 ** Special cases apply if either input segment is empty or if the 1783 ** two segments have no text in common. 1784 */ 1785 static void diff_step(DContext *p, int iS1, int iE1, int iS2, int iE2){ 1786 int iSX, iEX, iSY, iEY; 1787 1788 if( iE1<=iS1 ){ 1789 /* The first segment is empty */ 1790 if( iE2>iS2 ){ 1791 appendTriple(p, 0, 0, iE2-iS2); 1792 } 1793 return; 1794 } 1795 if( iE2<=iS2 ){ 1796 /* The second segment is empty */ 1797 appendTriple(p, 0, iE1-iS1, 0); 1798 return; 1799 } 1800 1801 /* Find the longest matching segment between the two sequences */ 1802 longestCommonSequence(p, iS1, iE1, iS2, iE2, &iSX, &iEX, &iSY, &iEY); 1803 1804 if( iEX>iSX ){ 1805 /* A common segment has been found. 1806 ** Recursively diff either side of the matching segment */ 1807 diff_step(p, iS1, iSX, iS2, iSY); 1808 if( iEX>iSX ){ 1809 appendTriple(p, iEX - iSX, 0, 0); 1810 } 1811 diff_step(p, iEX, iE1, iEY, iE2); 1812 }else{ 1813 /* The two segments have nothing in common. Delete the first then 1814 ** insert the second. */ 1815 appendTriple(p, 0, iE1-iS1, iE2-iS2); 1816 } 1817 } 1818 1819 /* 1820 ** Compute the differences between two files already loaded into 1821 ** the DContext structure. 1822 ** 1823 ** A divide and conquer technique is used. We look for a large 1824 ** block of common text that is in the middle of both files. Then 1825 ** compute the difference on those parts of the file before and 1826 ** after the common block. This technique is fast, but it does 1827 ** not necessarily generate the minimum difference set. On the 1828 ** other hand, we do not need a minimum difference set, only one 1829 ** that makes sense to human readers, which this algorithm does. 1830 ** 1831 ** Any common text at the beginning and end of the two files is 1832 ** removed before starting the divide-and-conquer algorithm. 1833 */ 1834 static void diff_all(DContext *p){ 1835 int mnE, iS, iE1, iE2; 1836 1837 /* Carve off the common header and footer */ 1838 iE1 = p->nFrom; 1839 iE2 = p->nTo; 1840 while( iE1>0 && iE2>0 && same_dline(&p->aFrom[iE1-1], &p->aTo[iE2-1]) ){ 1841 iE1--; 1842 iE2--; 1843 } 1844 mnE = iE1<iE2 ? iE1 : iE2; 1845 for(iS=0; iS<mnE && same_dline(&p->aFrom[iS],&p->aTo[iS]); iS++){} 1846 1847 /* do the difference */ 1848 if( iS>0 ){ 1849 appendTriple(p, iS, 0, 0); 1850 } 1851 diff_step(p, iS, iE1, iS, iE2); 1852 if( iE1<p->nFrom ){ 1853 appendTriple(p, p->nFrom - iE1, 0, 0); 1854 } 1855 1856 /* Terminate the COPY/DELETE/INSERT triples with three zeros */ 1857 expandEdit(p, p->nEdit+3); 1858 if( p->aEdit ){ 1859 p->aEdit[p->nEdit++] = 0; 1860 p->aEdit[p->nEdit++] = 0; 1861 p->aEdit[p->nEdit++] = 0; 1862 } 1863 } 1864 1865 /* 1866 ** Attempt to shift insertion or deletion blocks so that they begin and 1867 ** end on lines that are pure whitespace. In other words, try to transform 1868 ** this: 1869 ** 1870 ** int func1(int x){ 1871 ** return x*10; 1872 ** +} 1873 ** + 1874 ** +int func2(int x){ 1875 ** + return x*20; 1876 ** } 1877 ** 1878 ** int func3(int x){ 1879 ** return x/5; 1880 ** } 1881 ** 1882 ** Into one of these: 1883 ** 1884 ** int func1(int x){ int func1(int x){ 1885 ** return x*10; return x*10; 1886 ** } } 1887 ** + 1888 ** +int func2(int x){ +int func2(int x){ 1889 ** + return x*20; + return x*20; 1890 ** +} +} 1891 ** + 1892 ** int func3(int x){ int func3(int x){ 1893 ** return x/5; return x/5; 1894 ** } } 1895 */ 1896 static void diff_optimize(DContext *p){ 1897 int r; /* Index of current triple */ 1898 int lnFrom; /* Line number in p->aFrom */ 1899 int lnTo; /* Line number in p->aTo */ 1900 int cpy, del, ins; 1901 1902 lnFrom = lnTo = 0; 1903 for(r=0; r<p->nEdit; r += 3){ 1904 cpy = p->aEdit[r]; 1905 del = p->aEdit[r+1]; 1906 ins = p->aEdit[r+2]; 1907 lnFrom += cpy; 1908 lnTo += cpy; 1909 1910 /* Shift insertions toward the beginning of the file */ 1911 while( cpy>0 && del==0 && ins>0 ){ 1912 DLine *pTop = &p->aFrom[lnFrom-1]; /* Line before start of insert */ 1913 DLine *pBtm = &p->aTo[lnTo+ins-1]; /* Last line inserted */ 1914 if( same_dline(pTop, pBtm)==0 ) break; 1915 if( LENGTH(pTop+1)+LENGTH(pBtm)<=LENGTH(pTop)+LENGTH(pBtm-1) ) break; 1916 lnFrom--; 1917 lnTo--; 1918 p->aEdit[r]--; 1919 p->aEdit[r+3]++; 1920 cpy--; 1921 } 1922 1923 /* Shift insertions toward the end of the file */ 1924 while( r+3<p->nEdit && p->aEdit[r+3]>0 && del==0 && ins>0 ){ 1925 DLine *pTop = &p->aTo[lnTo]; /* First line inserted */ 1926 DLine *pBtm = &p->aTo[lnTo+ins]; /* First line past end of insert */ 1927 if( same_dline(pTop, pBtm)==0 ) break; 1928 if( LENGTH(pTop)+LENGTH(pBtm-1)<=LENGTH(pTop+1)+LENGTH(pBtm) ) break; 1929 lnFrom++; 1930 lnTo++; 1931 p->aEdit[r]++; 1932 p->aEdit[r+3]--; 1933 cpy++; 1934 } 1935 1936 /* Shift deletions toward the beginning of the file */ 1937 while( cpy>0 && del>0 && ins==0 ){ 1938 DLine *pTop = &p->aFrom[lnFrom-1]; /* Line before start of delete */ 1939 DLine *pBtm = &p->aFrom[lnFrom+del-1]; /* Last line deleted */ 1940 if( same_dline(pTop, pBtm)==0 ) break; 1941 if( LENGTH(pTop+1)+LENGTH(pBtm)<=LENGTH(pTop)+LENGTH(pBtm-1) ) break; 1942 lnFrom--; 1943 lnTo--; 1944 p->aEdit[r]--; 1945 p->aEdit[r+3]++; 1946 cpy--; 1947 } 1948 1949 /* Shift deletions toward the end of the file */ 1950 while( r+3<p->nEdit && p->aEdit[r+3]>0 && del>0 && ins==0 ){ 1951 DLine *pTop = &p->aFrom[lnFrom]; /* First line deleted */ 1952 DLine *pBtm = &p->aFrom[lnFrom+del]; /* First line past end of delete */ 1953 if( same_dline(pTop, pBtm)==0 ) break; 1954 if( LENGTH(pTop)+LENGTH(pBtm-1)<=LENGTH(pTop)+LENGTH(pBtm) ) break; 1955 lnFrom++; 1956 lnTo++; 1957 p->aEdit[r]++; 1958 p->aEdit[r+3]--; 1959 cpy++; 1960 } 1961 1962 lnFrom += del; 1963 lnTo += ins; 1964 } 1965 } 1966 1967 /* 1968 ** Extract the number of lines of context from diffFlags. Supply an 1969 ** appropriate default if no context width is specified. 1970 */ 1971 int diff_context_lines(u64 diffFlags){ 1972 int n = diffFlags & DIFF_CONTEXT_MASK; 1973 if( n==0 && (diffFlags & DIFF_CONTEXT_EX)==0 ) n = 5; 1974 return n; 1975 } 1976 1977 /* 1978 ** Extract the width of columns for side-by-side diff. Supply an 1979 ** appropriate default if no width is given. 1980 */ 1981 int diff_width(u64 diffFlags){ 1982 int w = (diffFlags & DIFF_WIDTH_MASK)/(DIFF_CONTEXT_MASK+1); 1983 if( w==0 ) w = 80; 1984 return w; 1985 } 1986 1987 /* 1988 ** Generate a report of the differences between files pA and pB. 1989 ** If pOut is not NULL then a unified diff is appended there. It 1990 ** is assumed that pOut has already been initialized. If pOut is 1991 ** NULL, then a pointer to an array of integers is returned. 1992 ** The integers come in triples. For each triple, 1993 ** the elements are the number of lines copied, the number of 1994 ** lines deleted, and the number of lines inserted. The vector 1995 ** is terminated by a triple of all zeros. 1996 ** 1997 ** This diff utility does not work on binary files. If a binary 1998 ** file is encountered, 0 is returned and pOut is written with 1999 ** text "cannot compute difference between binary files". 2000 */ 2001 int *text_diff( 2002 Blob *pA_Blob, /* FROM file */ 2003 Blob *pB_Blob, /* TO file */ 2004 Blob *pOut, /* Write diff here if not NULL */ 2005 ReCompiled *pRe, /* Only output changes where this Regexp matches */ 2006 u64 diffFlags /* DIFF_* flags defined above */ 2007 ){ 2008 int ignoreEolWs; /* Ignore whitespace at the end of lines */ 2009 DContext c; 2010 2011 if( diffFlags & DIFF_INVERT ){ 2012 Blob *pTemp = pA_Blob; 2013 pA_Blob = pB_Blob; 2014 pB_Blob = pTemp; 2015 } 2016 ignoreEolWs = (diffFlags & DIFF_IGNORE_EOLWS)!=0; 2017 2018 /* Prepare the input files */ 2019 memset(&c, 0, sizeof(c)); 2020 c.aFrom = break_into_lines(blob_str(pA_Blob), blob_size(pA_Blob), 2021 &c.nFrom, ignoreEolWs); 2022 c.aTo = break_into_lines(blob_str(pB_Blob), blob_size(pB_Blob), 2023 &c.nTo, ignoreEolWs); 2024 if( c.aFrom==0 || c.aTo==0 ){ 2025 fossil_free(c.aFrom); 2026 fossil_free(c.aTo); 2027 if( pOut ){ 2028 blob_appendf(pOut, DIFF_CANNOT_COMPUTE_BINARY); 2029 } 2030 return 0; 2031 } 2032 2033 /* Compute the difference */ 2034 diff_all(&c); 2035 if( (diffFlags & DIFF_NOTTOOBIG)!=0 ){ 2036 int i, m, n; 2037 int *a = c.aEdit; 2038 int mx = c.nEdit; 2039 for(i=m=n=0; i<mx; i+=3){ m += a[i]; n += a[i+1]+a[i+2]; } 2040 if( n>10000 ){ 2041 fossil_free(c.aFrom); 2042 fossil_free(c.aTo); 2043 fossil_free(c.aEdit); 2044 if( diffFlags & DIFF_HTML ){ 2045 blob_append(pOut, DIFF_TOO_MANY_CHANGES_HTML, -1); 2046 }else{ 2047 blob_append(pOut, DIFF_TOO_MANY_CHANGES_TXT, -1); 2048 } 2049 return 0; 2050 } 2051 } 2052 if( (diffFlags & DIFF_NOOPT)==0 ){ 2053 diff_optimize(&c); 2054 } 2055 2056 if( pOut ){ 2057 /* Compute a context or side-by-side diff into pOut */ 2058 if( diffFlags & DIFF_SIDEBYSIDE ){ 2059 sbsDiff(&c, pOut, pRe, diffFlags); 2060 }else{ 2061 contextDiff(&c, pOut, pRe, diffFlags); 2062 } 2063 fossil_free(c.aFrom); 2064 fossil_free(c.aTo); 2065 fossil_free(c.aEdit); 2066 return 0; 2067 }else{ 2068 /* If a context diff is not requested, then return the 2069 ** array of COPY/DELETE/INSERT triples. 2070 */ 2071 free(c.aFrom); 2072 free(c.aTo); 2073 return c.aEdit; 2074 } 2075 } 2076 2077 /* 2078 ** Process diff-related command-line options and return an appropriate 2079 ** "diffFlags" integer. 2080 ** 2081 ** --brief Show filenames only DIFF_BRIEF 2082 ** --context|-c N N lines of context. DIFF_CONTEXT_MASK 2083 ** --html Format for HTML DIFF_HTML 2084 ** --invert Invert the diff DIFF_INVERT 2085 ** --linenum|-n Show line numbers DIFF_LINENO 2086 ** --noopt Disable optimization DIFF_NOOPT 2087 ** --side-by-side|-y Side-by-side diff. DIFF_SIDEBYSIDE 2088 ** --unified Unified diff. ~DIFF_SIDEBYSIDE 2089 ** --width|-W N N character lines. DIFF_WIDTH_MASK 2090 */ 2091 u64 diff_options(void){ 2092 u64 diffFlags = 0; 2093 const char *z; 2094 int f; 2095 if( find_option("side-by-side","y",0)!=0 ) diffFlags |= DIFF_SIDEBYSIDE; 2096 if( find_option("unified",0,0)!=0 ) diffFlags &= ~DIFF_SIDEBYSIDE; 2097 if( (z = find_option("context","c",1))!=0 && (f = atoi(z))>=0 ){ 2098 if( f > DIFF_CONTEXT_MASK ) f = DIFF_CONTEXT_MASK; 2099 diffFlags |= f + DIFF_CONTEXT_EX; 2100 } 2101 if( (z = find_option("width","W",1))!=0 && (f = atoi(z))>0 ){ 2102 f *= DIFF_CONTEXT_MASK+1; 2103 if( f > DIFF_WIDTH_MASK ) f = DIFF_CONTEXT_MASK; 2104 diffFlags |= f; 2105 } 2106 if( find_option("html",0,0)!=0 ) diffFlags |= DIFF_HTML; 2107 if( find_option("linenum","n",0)!=0 ) diffFlags |= DIFF_LINENO; 2108 if( find_option("noopt",0,0)!=0 ) diffFlags |= DIFF_NOOPT; 2109 if( find_option("invert",0,0)!=0 ) diffFlags |= DIFF_INVERT; 2110 if( find_option("brief",0,0)!=0 ) diffFlags |= DIFF_BRIEF; 2111 return diffFlags; 2112 } 2113 2114 /* 2115 ** COMMAND: test-rawdiff 2116 */ 2117 void test_rawdiff_cmd(void){ 2118 Blob a, b; 2119 int r; 2120 int i; 2121 int *R; 2122 u64 diffFlags = diff_options(); 2123 if( g.argc<4 ) usage("FILE1 FILE2 ..."); 2124 blob_read_from_file(&a, g.argv[2]); 2125 for(i=3; i<g.argc; i++){ 2126 if( i>3 ) fossil_print("-------------------------------\n"); 2127 blob_read_from_file(&b, g.argv[i]); 2128 R = text_diff(&a, &b, 0, 0, diffFlags); 2129 for(r=0; R[r] || R[r+1] || R[r+2]; r += 3){ 2130 fossil_print(" copy %4d delete %4d insert %4d\n", R[r], R[r+1], R[r+2]); 2131 } 2132 /* free(R); */ 2133 blob_reset(&b); 2134 } 2135 } 2136 2137 /* 2138 ** COMMAND: test-diff 2139 ** 2140 ** Usage: %fossil [options] FILE1 FILE2 2141 ** 2142 ** Print the difference between two files. The usual diff options apply. 2143 */ 2144 void test_diff_cmd(void){ 2145 Blob a, b, out; 2146 u64 diffFlag; 2147 const char *zRe; /* Regex filter for diff output */ 2148 ReCompiled *pRe = 0; /* Regex filter for diff output */ 2149 2150 if( find_option("tk",0,0)!=0 ){ 2151 diff_tk("test-diff", 2); 2152 return; 2153 } 2154 find_option("i",0,0); 2155 zRe = find_option("regexp","e",1); 2156 if( zRe ){ 2157 const char *zErr = re_compile(&pRe, zRe, 0); 2158 if( zErr ) fossil_fatal("regex error: %s", zErr); 2159 } 2160 diffFlag = diff_options(); 2161 verify_all_options(); 2162 if( g.argc!=4 ) usage("FILE1 FILE2"); 2163 diff_print_filenames(g.argv[2], g.argv[3], diffFlag); 2164 blob_read_from_file(&a, g.argv[2]); 2165 blob_read_from_file(&b, g.argv[3]); 2166 blob_zero(&out); 2167 text_diff(&a, &b, &out, pRe, diffFlag); 2168 blob_write_to_file(&out, "-"); 2169 re_free(pRe); 2170 } 2171 2172 /************************************************************************** 2173 ** The basic difference engine is above. What follows is the annotation 2174 ** engine. Both are in the same file since they share many components. 2175 */ 2176 2177 /* 2178 ** The status of an annotation operation is recorded by an instance 2179 ** of the following structure. 2180 */ 2181 typedef struct Annotator Annotator; 2182 struct Annotator { 2183 DContext c; /* The diff-engine context */ 2184 struct AnnLine { /* Lines of the original files... */ 2185 const char *z; /* The text of the line */ 2186 short int n; /* Number of bytes (omitting trailing space and \n) */ 2187 short int iLevel; /* Level at which tag was set */ 2188 const char *zSrc; /* Tag showing origin of this line */ 2189 } *aOrig; 2190 int nOrig; /* Number of elements in aOrig[] */ 2191 int nNoSrc; /* Number of entries where aOrig[].zSrc==NULL */ 2192 int iLevel; /* Current level */ 2193 int nVers; /* Number of versions analyzed */ 2194 char **azVers; /* Names of versions analyzed */ 2195 }; 2196 2197 /* 2198 ** Initialize the annotation process by specifying the file that is 2199 ** to be annotated. The annotator takes control of the input Blob and 2200 ** will release it when it is finished with it. 2201 */ 2202 static int annotation_start(Annotator *p, Blob *pInput){ 2203 int i; 2204 2205 memset(p, 0, sizeof(*p)); 2206 p->c.aTo = break_into_lines(blob_str(pInput), blob_size(pInput),&p->c.nTo,1); 2207 if( p->c.aTo==0 ){ 2208 return 1; 2209 } 2210 p->aOrig = fossil_malloc( sizeof(p->aOrig[0])*p->c.nTo ); 2211 for(i=0; i<p->c.nTo; i++){ 2212 p->aOrig[i].z = p->c.aTo[i].z; 2213 p->aOrig[i].n = p->c.aTo[i].h & LENGTH_MASK; 2214 p->aOrig[i].zSrc = 0; 2215 } 2216 p->nOrig = p->c.nTo; 2217 return 0; 2218 } 2219 2220 /* 2221 ** The input pParent is the next most recent ancestor of the file 2222 ** being annotated. Do another step of the annotation. Return true 2223 ** if additional annotation is required. zPName is the tag to insert 2224 ** on each line of the file being annotated that was contributed by 2225 ** pParent. Memory to hold zPName is leaked. 2226 */ 2227 static int annotation_step(Annotator *p, Blob *pParent, char *zPName){ 2228 int i, j; 2229 int lnTo; 2230 int iPrevLevel; 2231 int iThisLevel; 2232 2233 /* Prepare the parent file to be diffed */ 2234 p->c.aFrom = break_into_lines(blob_str(pParent), blob_size(pParent), 2235 &p->c.nFrom, 1); 2236 if( p->c.aFrom==0 ){ 2237 return 1; 2238 } 2239 2240 /* Compute the differences going from pParent to the file being 2241 ** annotated. */ 2242 diff_all(&p->c); 2243 2244 /* Where new lines are inserted on this difference, record the 2245 ** zPName as the source of the new line. 2246 */ 2247 iPrevLevel = p->iLevel; 2248 p->iLevel++; 2249 iThisLevel = p->iLevel; 2250 for(i=lnTo=0; i<p->c.nEdit; i+=3){ 2251 struct AnnLine *x = &p->aOrig[lnTo]; 2252 for(j=0; j<p->c.aEdit[i]; j++, lnTo++, x++){ 2253 if( x->zSrc==0 || x->iLevel==iPrevLevel ){ 2254 x->zSrc = zPName; 2255 x->iLevel = iThisLevel; 2256 } 2257 } 2258 lnTo += p->c.aEdit[i+2]; 2259 } 2260 2261 /* Clear out the diff results */ 2262 fossil_free(p->c.aEdit); 2263 p->c.aEdit = 0; 2264 p->c.nEdit = 0; 2265 p->c.nEditAlloc = 0; 2266 2267 /* Clear out the from file */ 2268 free(p->c.aFrom); 2269 2270 /* Return no errors */ 2271 return 0; 2272 } 2273 2274 2275 /* 2276 ** COMMAND: test-annotate-step 2277 */ 2278 void test_annotate_step_cmd(void){ 2279 Blob orig, b; 2280 Annotator x; 2281 int i; 2282 2283 if( g.argc<4 ) usage("RID1 RID2 ..."); 2284 db_must_be_within_tree(); 2285 blob_zero(&b); 2286 content_get(name_to_rid(g.argv[2]), &orig); 2287 if( annotation_start(&x, &orig) ){ 2288 fossil_fatal("binary file"); 2289 } 2290 for(i=3; i<g.argc; i++){ 2291 blob_zero(&b); 2292 content_get(name_to_rid(g.argv[i]), &b); 2293 if( annotation_step(&x, &b, g.argv[i-1]) ){ 2294 fossil_fatal("binary file"); 2295 } 2296 } 2297 for(i=0; i<x.nOrig; i++){ 2298 const char *zSrc = x.aOrig[i].zSrc; 2299 if( zSrc==0 ) zSrc = g.argv[g.argc-1]; 2300 fossil_print("%10s: %.*s\n", zSrc, x.aOrig[i].n, x.aOrig[i].z); 2301 } 2302 } 2303 2304 /* Annotation flags */ 2305 #define ANN_FILE_VERS 0x01 /* Show file vers rather than commit vers */ 2306 #define ANN_FILE_ANCEST 0x02 /* Prefer check-ins in the ANCESTOR table */ 2307 2308 /* 2309 ** Compute a complete annotation on a file. The file is identified 2310 ** by its filename number (filename.fnid) and the baseline in which 2311 ** it was checked in (mlink.mid). 2312 */ 2313 static void annotate_file( 2314 Annotator *p, /* The annotator */ 2315 int fnid, /* The name of the file to be annotated */ 2316 int mid, /* Use the version of the file in this check-in */ 2317 int webLabel, /* Use web-style annotations if true */ 2318 int iLimit, /* Limit the number of levels if greater than zero */ 2319 int annFlags /* Flags to alter the annotation */ 2320 ){ 2321 Blob toAnnotate; /* Text of the final (mid) version of the file */ 2322 Blob step; /* Text of previous revision */ 2323 int rid; /* Artifact ID of the file being annotated */ 2324 char *zLabel; /* Label to apply to a line */ 2325 Stmt q; /* Query returning all ancestor versions */ 2326 Stmt ins; /* Inserts into the temporary VSEEN table */ 2327 int cnt = 0; /* Number of versions examined */ 2328 2329 /* Initialize the annotation */ 2330 rid = db_int(0, "SELECT fid FROM mlink WHERE mid=%d AND fnid=%d",mid,fnid); 2331 if( rid==0 ){ 2332 fossil_panic("file #%d is unchanged in manifest #%d", fnid, mid); 2333 } 2334 if( !content_get(rid, &toAnnotate) ){ 2335 fossil_panic("unable to retrieve content of artifact #%d", rid); 2336 } 2337 if( iLimit<=0 ) iLimit = 1000000000; 2338 annotation_start(p, &toAnnotate); 2339 db_begin_transaction(); 2340 db_multi_exec( 2341 "CREATE TEMP TABLE IF NOT EXISTS vseen(rid INTEGER PRIMARY KEY);" 2342 "DELETE FROM vseen;" 2343 ); 2344 2345 db_prepare(&ins, "INSERT OR IGNORE INTO vseen(rid) VALUES(:rid)"); 2346 db_prepare(&q, 2347 "SELECT (SELECT uuid FROM blob WHERE rid=mlink.%s)," 2348 " date(event.mtime)," 2349 " coalesce(event.euser,event.user)," 2350 " mlink.pid" 2351 " FROM mlink, event" 2352 " WHERE mlink.fid=:rid" 2353 " AND event.objid=mlink.mid" 2354 " AND mlink.pid NOT IN vseen" 2355 " ORDER BY %s event.mtime", 2356 (annFlags & ANN_FILE_VERS)!=0 ? "fid" : "mid", 2357 (annFlags & ANN_FILE_ANCEST)!=0 ? 2358 "(mlink.mid IN (SELECT rid FROM ancestor)) DESC,":"" 2359 ); 2360 2361 db_bind_int(&q, ":rid", rid); 2362 if( iLimit==0 ) iLimit = 1000000000; 2363 while( rid && iLimit>cnt && db_step(&q)==SQLITE_ROW ){ 2364 const char *zUuid = db_column_text(&q, 0); 2365 const char *zDate = db_column_text(&q, 1); 2366 const char *zUser = db_column_text(&q, 2); 2367 int prevId = db_column_int(&q, 3); 2368 if( webLabel ){ 2369 zLabel = mprintf( 2370 "<a href='%R/info/%s' target='infowindow'>%.10s</a> %s %13.13s", 2371 zUuid, zUuid, zDate, zUser 2372 ); 2373 }else{ 2374 zLabel = mprintf("%.10s %s %13.13s", zUuid, zDate, zUser); 2375 } 2376 p->nVers++; 2377 p->azVers = fossil_realloc(p->azVers, p->nVers*sizeof(p->azVers[0]) ); 2378 p->azVers[p->nVers-1] = zLabel; 2379 content_get(rid, &step); 2380 annotation_step(p, &step, zLabel); 2381 db_bind_int(&ins, ":rid", rid); 2382 db_step(&ins); 2383 db_reset(&ins); 2384 blob_reset(&step); 2385 db_reset(&q); 2386 rid = prevId; 2387 db_bind_int(&q, ":rid", prevId); 2388 cnt++; 2389 } 2390 db_finalize(&q); 2391 db_finalize(&ins); 2392 db_end_transaction(0); 2393 } 2394 2395 /* 2396 ** WEBPAGE: annotate 2397 ** 2398 ** Query parameters: 2399 ** 2400 ** checkin=ID The manifest ID at which to start the annotation 2401 ** filename=FILENAME The filename. 2402 */ 2403 void annotation_page(void){ 2404 int mid; 2405 int fnid; 2406 int i; 2407 int iLimit; 2408 int annFlags = ANN_FILE_ANCEST; 2409 int showLn = 0; /* True if line numbers should be shown */ 2410 char zLn[10]; /* Line number buffer */ 2411 char zFormat[10]; /* Format string for line numbers */ 2412 Annotator ann; 2413 2414 showLn = P("ln")!=0; 2415 login_check_credentials(); 2416 if( !g.perm.Read ){ login_needed(); return; } 2417 mid = name_to_typed_rid(PD("checkin","0"),"ci"); 2418 fnid = db_int(0, "SELECT fnid FROM filename WHERE name=%Q", P("filename")); 2419 if( mid==0 || fnid==0 ){ fossil_redirect_home(); } 2420 iLimit = atoi(PD("limit","-1")); 2421 if( !db_exists("SELECT 1 FROM mlink WHERE mid=%d AND fnid=%d",mid,fnid) ){ 2422 fossil_redirect_home(); 2423 } 2424 compute_direct_ancestors(mid, 10000000); 2425 style_header("File Annotation"); 2426 if( P("filevers") ) annFlags |= ANN_FILE_VERS; 2427 annotate_file(&ann, fnid, mid, g.perm.Hyperlink, iLimit, annFlags); 2428 if( P("log") ){ 2429 int i; 2430 @ <h2>Versions analyzed:</h2> 2431 @ <ol> 2432 for(i=0; i<ann.nVers; i++){ 2433 @ <li><tt>%s(ann.azVers[i])</tt></li> 2434 } 2435 @ </ol> 2436 @ <hr> 2437 @ <h2>Annotation:</h2> 2438 } 2439 if( showLn ){ 2440 sqlite3_snprintf(sizeof(zLn), zLn, "%d", ann.nOrig+1); 2441 sqlite3_snprintf(sizeof(zFormat), zFormat, "%%%dd:", strlen(zLn)); 2442 }else{ 2443 zLn[0] = 0; 2444 } 2445 @ <pre> 2446 for(i=0; i<ann.nOrig; i++){ 2447 ((char*)ann.aOrig[i].z)[ann.aOrig[i].n] = 0; 2448 if( showLn ) sqlite3_snprintf(sizeof(zLn), zLn, zFormat, i+1); 2449 @ %s(ann.aOrig[i].zSrc):%s(zLn) %h(ann.aOrig[i].z) 2450 } 2451 @ </pre> 2452 style_footer(); 2453 } 2454 2455 /* 2456 ** COMMAND: annotate 2457 ** 2458 ** %fossil annotate ?OPTIONS? FILENAME 2459 ** 2460 ** Output the text of a file with markings to show when each line of 2461 ** the file was last modified. 2462 ** 2463 ** Options: 2464 ** --limit N Only look backwards in time by N versions 2465 ** --log List all versions analyzed 2466 ** --filevers Show file version numbers rather than check-in versions 2467 ** 2468 ** See also: info, finfo, timeline 2469 */ 2470 void annotate_cmd(void){ 2471 int fnid; /* Filename ID */ 2472 int fid; /* File instance ID */ 2473 int mid; /* Manifest where file was checked in */ 2474 int cid; /* Checkout ID */ 2475 Blob treename; /* FILENAME translated to canonical form */ 2476 char *zFilename; /* Canonical filename */ 2477 Annotator ann; /* The annotation of the file */ 2478 int i; /* Loop counter */ 2479 const char *zLimit; /* The value to the --limit option */ 2480 int iLimit; /* How far back in time to look */ 2481 int showLog; /* True to show the log */ 2482 int fileVers; /* Show file version instead of check-in versions */ 2483 int annFlags = 0; /* Flags to control annotation properties */ 2484 2485 zLimit = find_option("limit",0,1); 2486 if( zLimit==0 || zLimit[0]==0 ) zLimit = "-1"; 2487 iLimit = atoi(zLimit); 2488 showLog = find_option("log",0,0)!=0; 2489 fileVers = find_option("filevers",0,0)!=0; 2490 db_must_be_within_tree(); 2491 if( g.argc<3 ) { 2492 usage("FILENAME"); 2493 } 2494 file_tree_name(g.argv[2], &treename, 1); 2495 zFilename = blob_str(&treename); 2496 fnid = db_int(0, "SELECT fnid FROM filename WHERE name=%Q", zFilename); 2497 if( fnid==0 ){ 2498 fossil_fatal("no such file: %s", zFilename); 2499 } 2500 fid = db_int(0, "SELECT rid FROM vfile WHERE pathname=%Q", zFilename); 2501 if( fid==0 ){ 2502 fossil_fatal("not part of current checkout: %s", zFilename); 2503 } 2504 cid = db_lget_int("checkout", 0); 2505 if( cid == 0 ){ 2506 fossil_fatal("Not in a checkout"); 2507 } 2508 if( iLimit<=0 ) iLimit = 1000000000; 2509 compute_direct_ancestors(cid, iLimit); 2510 mid = db_int(0, "SELECT mlink.mid FROM mlink, ancestor " 2511 " WHERE mlink.fid=%d AND mlink.fnid=%d AND mlink.mid=ancestor.rid" 2512 " ORDER BY ancestor.generation ASC LIMIT 1", 2513 fid, fnid); 2514 if( mid==0 ){ 2515 fossil_panic("unable to find manifest"); 2516 } 2517 if( fileVers ) annFlags |= ANN_FILE_VERS; 2518 annFlags |= ANN_FILE_ANCEST; 2519 annotate_file(&ann, fnid, mid, 0, iLimit, annFlags); 2520 if( showLog ){ 2521 for(i=0; i<ann.nVers; i++){ 2522 printf("version %3d: %s\n", i+1, ann.azVers[i]); 2523 } 2524 printf("---------------------------------------------------\n"); 2525 } 2526 for(i=0; i<ann.nOrig; i++){ 2527 fossil_print("%s: %.*s\n", 2528 ann.aOrig[i].zSrc, ann.aOrig[i].n, ann.aOrig[i].z); 2529 } 2530 } 2531 2532 /* 2533 ** COMMAND: test-looks-like-utf 2534 ** 2535 ** Usage: %fossil test-looks-like-utf FILENAME 2536 ** 2537 ** FILENAME is the name of a file to check for textual content in the UTF-8 2538 ** and/or UTF-16 encodings. 2539 */ 2540 void looks_like_utf_test_cmd(void){ 2541 Blob blob; /* the contents of the specified file */ 2542 int fUtf8; /* return value of starts_with_utf8_bom() */ 2543 int fUtf16; /* return value of starts_with_utf16_bom() */ 2544 int fUnicode; /* return value of could_be_utf16() */ 2545 int lookFlags; /* output flags from looks_like_utf8/utf16() */ 2546 int bRevUtf16 = 0; /* non-zero -> UTF-16 byte order reversed */ 2547 int bRevUnicode = 0; /* non-zero -> UTF-16 byte order reversed */ 2548 if( g.argc!=3 ) usage("FILENAME"); 2549 blob_read_from_file(&blob, g.argv[2]); 2550 fUtf8 = starts_with_utf8_bom(&blob, 0); 2551 fUtf16 = starts_with_utf16_bom(&blob, 0, &bRevUtf16); 2552 fUnicode = could_be_utf16(&blob, &bRevUnicode); 2553 lookFlags = fUnicode ? looks_like_utf16(&blob, bRevUnicode, 0) : 2554 looks_like_utf8(&blob, 0); 2555 fossil_print("File \"%s\" has %d bytes.\n",g.argv[2],blob_size(&blob)); 2556 fossil_print("Starts with UTF-8 BOM: %s\n",fUtf8?"yes":"no"); 2557 fossil_print("Starts with UTF-16 BOM: %s\n", 2558 fUtf16?(bRevUtf16?"reversed":"yes"):"no"); 2559 fossil_print("Looks like UTF-%s: %s\n",fUnicode?"16":"8", 2560 (lookFlags&LOOK_BINARY)?"no":"yes"); 2561 fossil_print("Has flag LOOK_NUL: %s\n",(lookFlags&LOOK_NUL)?"yes":"no"); 2562 fossil_print("Has flag LOOK_CR: %s\n",(lookFlags&LOOK_CR)?"yes":"no"); 2563 fossil_print("Has flag LOOK_LONE_CR: %s\n", 2564 (lookFlags&LOOK_LONE_CR)?"yes":"no"); 2565 fossil_print("Has flag LOOK_LF: %s\n",(lookFlags&LOOK_LF)?"yes":"no"); 2566 fossil_print("Has flag LOOK_LONE_LF: %s\n", 2567 (lookFlags&LOOK_LONE_LF)?"yes":"no"); 2568 fossil_print("Has flag LOOK_CRLF: %s\n",(lookFlags&LOOK_CRLF)?"yes":"no"); 2569 fossil_print("Has flag LOOK_LONG: %s\n",(lookFlags&LOOK_LONG)?"yes":"no"); 2570 fossil_print("Has flag LOOK_INVALID: %s\n", 2571 (lookFlags&LOOK_INVALID)?"yes":"no"); 2572 fossil_print("Has flag LOOK_ODD: %s\n",(lookFlags&LOOK_ODD)?"yes":"no"); 2573 fossil_print("Has flag LOOK_SHORT: %s\n",(lookFlags&LOOK_SHORT)?"yes":"no"); 2574 blob_reset(&blob); 2575 }