1 /*
2 ** Copyright (c) 2007 D. Richard Hipp
3 **
4 ** This program is free software; you can redistribute it and/or
5 ** modify it under the terms of the Simplified BSD License (also
6 ** known as the "2-Clause License" or "FreeBSD License".)
7
8 ** This program is distributed in the hope that it will be useful,
9 ** but without any warranty; without even the implied warranty of
10 ** merchantability or fitness for a particular purpose.
11 **
12 ** Author contact information:
13 ** drh@hwaci.com
14 ** http://www.hwaci.com/drh/
15 **
16 *******************************************************************************
17 **
18 ** This file contains code used to compute a "diff" between two
19 ** text files.
20 */
21 #include "config.h"
22 #include "diff.h"
23 #include <assert.h>
24
25
26 /*
27 ** Maximum length of a line in a text file. (8192)
28 */
29 #define LENGTH_MASK_SZ 13
30 #define LENGTH_MASK ((1<<LENGTH_MASK_SZ)-1)
31
32 /*
33 ** Information about each line of a file being diffed.
34 **
35 ** The lower LENGTH_MASK_SZ bits of the hash (DLine.h) are the length
36 ** of the line. If any line is longer than LENGTH_MASK characters,
37 ** the file is considered binary.
38 */
39 typedef struct DLine DLine;
40 struct DLine {
41 const char *z; /* The text of the line */
42 unsigned int h; /* Hash of the line */
43 unsigned int iNext; /* 1+(Index of next line with same the same hash) */
44
45 /* an array of DLine elements services two purposes. The fields
46 ** above are one per line of input text. But each entry is also
47 ** a bucket in a hash table, as follows: */
48 unsigned int iHash; /* 1+(first entry in the hash chain) */
49 };
50
51 /*
52 ** A context for running a diff.
53 */
54 typedef struct DContext DContext;
55 struct DContext {
56 int *aEdit; /* Array of copy/delete/insert triples */
57 int nEdit; /* Number of integers (3x num of triples) in aEdit[] */
58 int nEditAlloc; /* Space allocated for aEdit[] */
59 DLine *aFrom; /* File on left side of the diff */
60 int nFrom; /* Number of lines in aFrom[] */
61 DLine *aTo; /* File on right side of the diff */
62 int nTo; /* Number of lines in aTo[] */
63 };
64
65 /*
66 ** Return an array of DLine objects containing a pointer to the
67 ** start of each line and a hash of that line. The lower
68 ** bits of the hash store the length of each line.
69 **
70 ** Trailing whitespace is removed from each line. 2010-08-20: Not any
71 ** more. If trailing whitespace is ignored, the "patch" command gets
72 ** confused by the diff output. Ticket [a9f7b23c2e376af5b0e5b]
73 **
74 ** Return 0 if the file is binary or contains a line that is
75 ** too long.
76 */
77 static DLine *break_into_lines(const char *z, int n, int *pnLine, int ignoreWS){
78 int nLine, i, j, k, x;
79 unsigned int h, h2;
80 DLine *a;
81
82 /* Count the number of lines. Allocate space to hold
83 ** the returned array.
84 */
85 for(i=j=0, nLine=1; i<n; i++, j++){
86 int c = z[i];
87 if( c==0 ){
88 return 0;
89 }
90 if( c=='\n' && z[i+1]!=0 ){
91 nLine++;
92 if( j>LENGTH_MASK ){
93 return 0;
94 }
95 j = 0;
96 }
97 }
98 if( j>LENGTH_MASK ){
99 return 0;
100 }
101 a = fossil_malloc( nLine*sizeof(a[0]) );
102 memset(a, 0, nLine*sizeof(a[0]) );
103 if( n==0 ){
104 *pnLine = 0;
105 return a;
106 }
107
108 /* Fill in the array */
109 for(i=0; i<nLine; i++){
110 a[i].z = z;
111 for(j=0; z[j] && z[j]!='\n'; j++){}
112 k = j;
113 while( ignoreWS && k>0 && fossil_isspace(z[k-1]) ){ k--; }
114 for(h=0, x=0; x<k; x++){
115 h = h ^ (h<<2) ^ z[x];
116 }
117 a[i].h = h = (h<<LENGTH_MASK_SZ) | k;;
118 h2 = h % nLine;
119 a[i].iNext = a[h2].iHash;
120 a[h2].iHash = i+1;
121 z += j+1;
122 }
123
124 /* Return results */
125 *pnLine = nLine;
126 return a;
127 }
128
129 /*
130 ** Return true if two DLine elements are identical.
131 */
132 static int same_dline(DLine *pA, DLine *pB){
133 return pA->h==pB->h && memcmp(pA->z,pB->z,pA->h & LENGTH_MASK)==0;
134 }
135
136 /*
137 ** Append a single line of "diff" output to pOut.
138 */
139 static void appendDiffLine(Blob *pOut, char *zPrefix, DLine *pLine){
140 blob_append(pOut, zPrefix, 1);
141 blob_append(pOut, pLine->z, pLine->h & LENGTH_MASK);
142 blob_append(pOut, "\n", 1);
143 }
144
145 /*
146 ** Expand the size of aEdit[] array to hold nEdit elements.
147 */
148 static void expandEdit(DContext *p, int nEdit){
149 p->aEdit = fossil_realloc(p->aEdit, nEdit*sizeof(int));
150 p->nEditAlloc = nEdit;
151 }
152
153 /*
154 ** Append a new COPY/DELETE/INSERT triple.
155 */
156 static void appendTriple(DContext *p, int nCopy, int nDel, int nIns){
157 /* printf("APPEND %d/%d/%d\n", nCopy, nDel, nIns); */
158 if( p->nEdit>=3 ){
159 if( p->aEdit[p->nEdit-1]==0 ){
160 if( p->aEdit[p->nEdit-2]==0 ){
161 p->aEdit[p->nEdit-3] += nCopy;
162 p->aEdit[p->nEdit-2] += nDel;
163 p->aEdit[p->nEdit-1] += nIns;
164 return;
165 }
166 if( nCopy==0 ){
167 p->aEdit[p->nEdit-2] += nDel;
168 p->aEdit[p->nEdit-1] += nIns;
169 return;
170 }
171 }
172 if( nCopy==0 && nDel==0 ){
173 p->aEdit[p->nEdit-1] += nIns;
174 return;
175 }
176 }
177 if( p->nEdit+3>p->nEditAlloc ){
178 expandEdit(p, p->nEdit*2 + 15);
179 if( p->aEdit==0 ) return;
180 }
181 p->aEdit[p->nEdit++] = nCopy;
182 p->aEdit[p->nEdit++] = nDel;
183 p->aEdit[p->nEdit++] = nIns;
184 }
185
186
187 /*
188 ** Given a diff context in which the aEdit[] array has been filled
189 ** in, compute a context diff into pOut.
190 */
191 static void contextDiff(DContext *p, Blob *pOut, int nContext){
192 DLine *A; /* Left side of the diff */
193 DLine *B; /* Right side of the diff */
194 int a = 0; /* Index of next line in A[] */
195 int b = 0; /* Index of next line in B[] */
196 int *R; /* Array of COPY/DELETE/INSERT triples */
197 int r; /* Index into R[] */
198 int nr; /* Number of COPY/DELETE/INSERT triples to process */
199 int mxr; /* Maximum value for r */
200 int na, nb; /* Number of lines shown from A and B */
201 int i, j; /* Loop counters */
202 int m; /* Number of lines to output */
203 int skip; /* Number of lines to skip */
204
205 A = p->aFrom;
206 B = p->aTo;
207 R = p->aEdit;
208 mxr = p->nEdit;
209 while( mxr>2 && R[mxr-1]==0 && R[mxr-2]==0 ){ mxr -= 3; }
210 for(r=0; r<mxr; r += 3*nr){
211 /* Figure out how many triples to show in a single block */
212 for(nr=1; R[r+nr*3]>0 && R[r+nr*3]<nContext*2; nr++){}
213 /* printf("r=%d nr=%d\n", r, nr); */
214
215 /* For the current block comprising nr triples, figure out
216 ** how many lines of A and B are to be displayed
217 */
218 if( R[r]>nContext ){
219 na = nb = nContext;
220 skip = R[r] - nContext;
221 }else{
222 na = nb = R[r];
223 skip = 0;
224 }
225 for(i=0; i<nr; i++){
226 na += R[r+i*3+1];
227 nb += R[r+i*3+2];
228 }
229 if( R[r+nr*3]>nContext ){
230 na += nContext;
231 nb += nContext;
232 }else{
233 na += R[r+nr*3];
234 nb += R[r+nr*3];
235 }
236 for(i=1; i<nr; i++){
237 na += R[r+i*3];
238 nb += R[r+i*3];
239 }
240 /*
241 * If the patch changes an empty file or results in an empty file,
242 * the block header must use 0,0 as position indicator and not 1,0.
243 * Otherwise, patch would be confused and may reject the diff.
244 */
245 blob_appendf(pOut,"@@ -%d,%d +%d,%d @@\n",
246 na ? a+skip+1 : 0, na,
247 nb ? b+skip+1 : 0, nb);
248
249 /* Show the initial common area */
250 a += skip;
251 b += skip;
252 m = R[r] - skip;
253 for(j=0; j<m; j++){
254 appendDiffLine(pOut, " ", &A[a+j]);
255 }
256 a += m;
257 b += m;
258
259 /* Show the differences */
260 for(i=0; i<nr; i++){
261 m = R[r+i*3+1];
262 for(j=0; j<m; j++){
263 appendDiffLine(pOut, "-", &A[a+j]);
264 }
265 a += m;
266 m = R[r+i*3+2];
267 for(j=0; j<m; j++){
268 appendDiffLine(pOut, "+", &B[b+j]);
269 }
270 b += m;
271 if( i<nr-1 ){
272 m = R[r+i*3+3];
273 for(j=0; j<m; j++){
274 appendDiffLine(pOut, " ", &B[b+j]);
275 }
276 b += m;
277 a += m;
278 }
279 }
280
281 /* Show the final common area */
282 assert( nr==i );
283 m = R[r+nr*3];
284 if( m>nContext ) m = nContext;
285 for(j=0; j<m; j++){
286 appendDiffLine(pOut, " ", &B[b+j]);
287 }
288 }
289 }
290
291 /*
292 ** Compute the optimal longest common subsequence (LCS) using an
293 ** exhaustive search. This version of the LCS is only used for
294 ** shorter input strings since runtime is O(N*N) where N is the
295 ** input string length.
296 */
297 static void optimalLCS(
298 DContext *p, /* Two files being compared */
299 int iS1, int iE1, /* Range of lines in p->aFrom[] */
300 int iS2, int iE2, /* Range of lines in p->aTo[] */
301 int *piSX, int *piEX, /* Write p->aFrom[] common segment here */
302 int *piSY, int *piEY /* Write p->aTo[] common segment here */
303 ){
304 int mxLength = 0; /* Length of longest common subsequence */
305 int i, j; /* Loop counters */
306 int k; /* Length of a candidate subsequence */
307 int iSXb = iS1; /* Best match so far */
308 int iSYb = iS2; /* Best match so far */
309
310 for(i=iS1; i<iE1-mxLength; i++){
311 for(j=iS2; j<iE2-mxLength; j++){
312 if( !same_dline(&p->aFrom[i], &p->aTo[j]) ) continue;
313 if( mxLength && !same_dline(&p->aFrom[i+mxLength], &p->aTo[j+mxLength]) ){
314 continue;
315 }
316 k = 1;
317 while( i+k<iE1 && j+k<iE2 && same_dline(&p->aFrom[i+k],&p->aTo[j+k]) ){
318 k++;
319 }
320 if( k>mxLength ){
321 iSXb = i;
322 iSYb = j;
323 mxLength = k;
324 }
325 }
326 }
327 *piSX = iSXb;
328 *piEX = iSXb + mxLength;
329 *piSY = iSYb;
330 *piEY = iSYb + mxLength;
331 }
332
333 /*
334 ** Compare two blocks of text on lines iS1 through iE1-1 of the aFrom[]
335 ** file and lines iS2 through iE2-1 of the aTo[] file. Locate a sequence
336 ** of lines in these two blocks that are exactly the same. Return
337 ** the bounds of the matching sequence.
338 **
339 ** If there are two or more possible answers of the same length, the
340 ** returned sequence should be the one closest to the center of the
341 ** input range.
342 **
343 ** Ideally, the common sequence should be the longest possible common
344 ** sequence. However, an exact computation of LCS is O(N*N) which is
345 ** way too slow for larger files. So this routine uses an O(N)
346 ** heuristic approximation based on hashing that usually works about
347 ** as well. But if the O(N) algorithm doesn't get a good solution
348 ** and N is not too large, we fall back to an exact solution by
349 ** calling optimalLCS().
350 */
351 static void longestCommonSequence(
352 DContext *p, /* Two files being compared */
353 int iS1, int iE1, /* Range of lines in p->aFrom[] */
354 int iS2, int iE2, /* Range of lines in p->aTo[] */
355 int *piSX, int *piEX, /* Write p->aFrom[] common segment here */
356 int *piSY, int *piEY /* Write p->aTo[] common segment here */
357 ){
358 double bestScore = -1e30; /* Best score seen so far */
359 int i, j; /* Loop counters */
360 int iSX, iSY, iEX, iEY; /* Current match */
361 double score; /* Current score */
362 int skew; /* How lopsided is the match */
363 int dist; /* Distance of match from center */
364 int mid; /* Center of the span */
365 int iSXb, iSYb, iEXb, iEYb; /* Best match so far */
366 int iSXp, iSYp, iEXp, iEYp; /* Previous match */
367
368
369 iSXb = iSXp = iS1;
370 iEXb = iEXp = iS1;
371 iSYb = iSYp = iS2;
372 iEYb = iEYp = iS2;
373 mid = (iE1 + iS1)/2;
374 for(i=iS1; i<iE1; i++){
375 int limit = 0;
376 j = p->aTo[p->aFrom[i].h % p->nTo].iHash;
377 while( j>0
378 && (j-1<iS2 || j>=iE2 || !same_dline(&p->aFrom[i], &p->aTo[j-1]))
379 ){
380 if( limit++ > 10 ){
381 j = 0;
382 break;
383 }
384 j = p->aTo[j-1].iNext;
385 }
386 if( j==0 ) continue;
387 assert( i>=iSXb && i>=iSXp );
388 if( i<iEXb && j>=iSYb && j<iEYb ) continue;
389 if( i<iEXp && j>=iSYp && j<iEYp ) continue;
390 iSX = i;
391 iSY = j-1;
392 while( iSX>iS1 && iSY>iS2 && same_dline(&p->aFrom[iSX-1],&p->aTo[iSY-1]) ){
393 iSX--;
394 iSY--;
395 }
396 iEX = i+1;
397 iEY = j;
398 while( iEX<iE1 && iEY<iE2 && same_dline(&p->aFrom[iEX],&p->aTo[iEY]) ){
399 iEX++;
400 iEY++;
401 }
402 skew = (iSX-iS1) - (iSY-iS2);
403 if( skew<0 ) skew = -skew;
404 dist = (iSX+iEX)/2 - mid;
405 if( dist<0 ) dist = -dist;
406 score = (iEX - iSX) - 0.05*skew - 0.05*dist;
407 if( score>bestScore ){
408 bestScore = score;
409 iSXb = iSX;
410 iSYb = iSY;
411 iEXb = iEX;
412 iEYb = iEY;
413 }else{
414 iSXp = iSX;
415 iSYp = iSY;
416 iEXp = iEX;
417 iEYp = iEY;
418 }
419 }
420 if( iSXb==iEXb && (iE1-iS1)*(iE2-iS2)<400 ){
421 /* If no common sequence is found using the hashing heuristic and
422 ** the input is not too big, use the expensive exact solution */
423 optimalLCS(p, iS1, iE1, iS2, iE2, piSX, piEX, piSY, piEY);
424 }else{
425 *piSX = iSXb;
426 *piSY = iSYb;
427 *piEX = iEXb;
428 *piEY = iEYb;
429 }
430 /* printf("LCS(%d..%d/%d..%d) = %d..%d/%d..%d\n",
431 iS1, iE1, iS2, iE2, *piSX, *piEX, *piSY, *piEY); */
432 }
433
434 /*
435 ** Do a single step in the difference. Compute a sequence of
436 ** copy/delete/insert steps that will convert lines iS1 through iE1-1 of
437 ** the input into lines iS2 through iE2-1 of the output and write
438 ** that sequence into the difference context.
439 **
440 ** The algorithm is to find a block of common text near the middle
441 ** of the two segments being diffed. Then recursively compute
442 ** differences on the blocks before and after that common segment.
443 ** Special cases apply if either input segment is empty or if the
444 ** two segments have no text in common.
445 */
446 static void diff_step(DContext *p, int iS1, int iE1, int iS2, int iE2){
447 int iSX, iEX, iSY, iEY;
448
449 if( iE1<=iS1 ){
450 /* The first segment is empty */
451 if( iE2>iS2 ){
452 appendTriple(p, 0, 0, iE2-iS2);
453 }
454 return;
455 }
456 if( iE2<=iS2 ){
457 /* The second segment is empty */
458 appendTriple(p, 0, iE1-iS1, 0);
459 return;
460 }
461
462 /* Find the longest matching segment between the two sequences */
463 longestCommonSequence(p, iS1, iE1, iS2, iE2, &iSX, &iEX, &iSY, &iEY);
464
465 if( iEX>iSX ){
466 /* A common segement has been found.
467 ** Recursively diff either side of the matching segment */
468 diff_step(p, iS1, iSX, iS2, iSY);
469 if( iEX>iSX ){
470 appendTriple(p, iEX - iSX, 0, 0);
471 }
472 diff_step(p, iEX, iE1, iEY, iE2);
473 }else{
474 /* The two segments have nothing in common. Delete the first then
475 ** insert the second. */
476 appendTriple(p, 0, iE1-iS1, iE2-iS2);
477 }
478 }
479
480 /*
481 ** Compute the differences between two files already loaded into
482 ** the DContext structure.
483 **
484 ** A divide and conquer technique is used. We look for a large
485 ** block of common text that is in the middle of both files. Then
486 ** compute the difference on those parts of the file before and
487 ** after the common block. This technique is fast, but it does
488 ** not necessarily generate the minimum difference set. On the
489 ** other hand, we do not need a minimum difference set, only one
490 ** that makes sense to human readers, which this algorithm does.
491 **
492 ** Any common text at the beginning and end of the two files is
493 ** removed before starting the divide-and-conquer algorithm.
494 */
495 static void diff_all(DContext *p){
496 int mnE, iS, iE1, iE2;
497
498 /* Carve off the common header and footer */
499 iE1 = p->nFrom;
500 iE2 = p->nTo;
501 while( iE1>0 && iE2>0 && same_dline(&p->aFrom[iE1-1], &p->aTo[iE2-1]) ){
502 iE1--;
503 iE2--;
504 }
505 mnE = iE1<iE2 ? iE1 : iE2;
506 for(iS=0; iS<mnE && same_dline(&p->aFrom[iS],&p->aTo[iS]); iS++){}
507
508 /* do the difference */
509 if( iS>0 ){
510 appendTriple(p, iS, 0, 0);
511 }
512 diff_step(p, iS, iE1, iS, iE2);
513 if( iE1<p->nFrom ){
514 appendTriple(p, p->nFrom - iE1, 0, 0);
515 }
516
517 /* Terminate the COPY/DELETE/INSERT triples with three zeros */
518 expandEdit(p, p->nEdit+3);
519 if( p->aEdit ){
520 p->aEdit[p->nEdit++] = 0;
521 p->aEdit[p->nEdit++] = 0;
522 p->aEdit[p->nEdit++] = 0;
523 }
524 }
525
526 /*
527 ** Generate a report of the differences between files pA and pB.
528 ** If pOut is not NULL then a unified diff is appended there. It
529 ** is assumed that pOut has already been initialized. If pOut is
530 ** NULL, then a pointer to an array of integers is returned.
531 ** The integers come in triples. For each triple,
532 ** the elements are the number of lines copied, the number of
533 ** lines deleted, and the number of lines inserted. The vector
534 ** is terminated by a triple of all zeros.
535 **
536 ** This diff utility does not work on binary files. If a binary
537 ** file is encountered, 0 is returned and pOut is written with
538 ** text "cannot compute difference between binary files".
539 */
540 int *text_diff(
541 Blob *pA_Blob, /* FROM file */
542 Blob *pB_Blob, /* TO file */
543 Blob *pOut, /* Write unified diff here if not NULL */
544 int nContext, /* Amount of context to unified diff */
545 int ignoreEolWs /* Ignore whitespace at the end of lines */
546 ){
547 DContext c;
548
549 /* Prepare the input files */
550 memset(&c, 0, sizeof(c));
551 c.aFrom = break_into_lines(blob_str(pA_Blob), blob_size(pA_Blob),
552 &c.nFrom, ignoreEolWs);
553 c.aTo = break_into_lines(blob_str(pB_Blob), blob_size(pB_Blob),
554 &c.nTo, ignoreEolWs);
555 if( c.aFrom==0 || c.aTo==0 ){
556 free(c.aFrom);
557 free(c.aTo);
558 if( pOut ){
559 blob_appendf(pOut, "cannot compute difference between binary files\n");
560 }
561 return 0;
562 }
563
564 /* Compute the difference */
565 diff_all(&c);
566
567 if( pOut ){
568 /* Compute a context diff if requested */
569 contextDiff(&c, pOut, nContext);
570 free(c.aFrom);
571 free(c.aTo);
572 free(c.aEdit);
573 return 0;
574 }else{
575 /* If a context diff is not requested, then return the
576 ** array of COPY/DELETE/INSERT triples.
577 */
578 free(c.aFrom);
579 free(c.aTo);
580 return c.aEdit;
581 }
582 }
583
584 /*
585 ** COMMAND: test-rawdiff
586 */
587 void test_rawdiff_cmd(void){
588 Blob a, b;
589 int r;
590 int i;
591 int *R;
592 if( g.argc<4 ) usage("FILE1 FILE2 ...");
593 blob_read_from_file(&a, g.argv[2]);
594 for(i=3; i<g.argc; i++){
595 if( i>3 ) printf("-------------------------------\n");
596 blob_read_from_file(&b, g.argv[i]);
597 R = text_diff(&a, &b, 0, 0, 0);
598 for(r=0; R[r] || R[r+1] || R[r+2]; r += 3){
599 printf(" copy %4d delete %4d insert %4d\n", R[r], R[r+1], R[r+2]);
600 }
601 /* free(R); */
602 blob_reset(&b);
603 }
604 }
605
606 /*
607 ** COMMAND: test-udiff
608 */
609 void test_udiff_cmd(void){
610 Blob a, b, out;
611 if( g.argc!=4 ) usage("FILE1 FILE2");
612 blob_read_from_file(&a, g.argv[2]);
613 blob_read_from_file(&b, g.argv[3]);
614 blob_zero(&out);
615 text_diff(&a, &b, &out, 3, 0);
616 blob_write_to_file(&out, "-");
617 }
618
619 /**************************************************************************
620 ** The basic difference engine is above. What follows is the annotation
621 ** engine. Both are in the same file since they share many components.
622 */
623
624 /*
625 ** The status of an annotation operation is recorded by an instance
626 ** of the following structure.
627 */
628 typedef struct Annotator Annotator;
629 struct Annotator {
630 DContext c; /* The diff-engine context */
631 struct { /* Lines of the original files... */
632 const char *z; /* The text of the line */
633 int n; /* Number of bytes (omitting trailing space and \n) */
634 const char *zSrc; /* Tag showing origin of this line */
635 } *aOrig;
636 int nOrig; /* Number of elements in aOrig[] */
637 int nNoSrc; /* Number of entries where aOrig[].zSrc==NULL */
638 };
639
640 /*
641 ** Initialize the annotation process by specifying the file that is
642 ** to be annotated. The annotator takes control of the input Blob and
643 ** will release it when it is finished with it.
644 */
645 static int annotation_start(Annotator *p, Blob *pInput){
646 int i;
647
648 memset(p, 0, sizeof(*p));
649 p->c.aTo = break_into_lines(blob_str(pInput), blob_size(pInput),&p->c.nTo,1);
650 if( p->c.aTo==0 ){
651 return 1;
652 }
653 p->aOrig = fossil_malloc( sizeof(p->aOrig[0])*p->c.nTo );
654 for(i=0; i<p->c.nTo; i++){
655 p->aOrig[i].z = p->c.aTo[i].z;
656 p->aOrig[i].n = p->c.aTo[i].h & LENGTH_MASK;
657 p->aOrig[i].zSrc = 0;
658 }
659 p->nOrig = p->c.nTo;
660 return 0;
661 }
662
663 /*
664 ** The input pParent is the next most recent ancestor of the file
665 ** being annotated. Do another step of the annotation. Return true
666 ** if additional annotation is required. zPName is the tag to insert
667 ** on each line of the file being annotated that was contributed by
668 ** pParent. Memory to hold zPName is leaked.
669 */
670 static int annotation_step(Annotator *p, Blob *pParent, char *zPName){
671 int i, j;
672 int lnTo;
673
674 /* Prepare the parent file to be diffed */
675 p->c.aFrom = break_into_lines(blob_str(pParent), blob_size(pParent),
676 &p->c.nFrom, 1);
677 if( p->c.aFrom==0 ){
678 return 1;
679 }
680
681 /* Compute the differences going from pParent to the file being
682 ** annotated. */
683 diff_all(&p->c);
684
685 /* Where new lines are inserted on this difference, record the
686 ** zPName as the source of the new line.
687 */
688 for(i=lnTo=0; i<p->c.nEdit; i+=3){
689 for(j=0; j<p->c.aEdit[i]; j++, lnTo++){
690 p->aOrig[lnTo].zSrc = zPName;
691 }
692 lnTo += p->c.aEdit[i+2];
693 }
694
695 /* Clear out the diff results */
696 free(p->c.aEdit);
697 p->c.aEdit = 0;
698 p->c.nEdit = 0;
699 p->c.nEditAlloc = 0;
700
701 /* Clear out the from file */
702 free(p->c.aFrom);
703 blob_zero(pParent);
704
705 /* Return no errors */
706 return 0;
707 }
708
709
710 /*
711 ** COMMAND: test-annotate-step
712 */
713 void test_annotate_step_cmd(void){
714 Blob orig, b;
715 Annotator x;
716 int i;
717
718 if( g.argc<4 ) usage("RID1 RID2 ...");
719 db_must_be_within_tree();
720 blob_zero(&b);
721 content_get(name_to_rid(g.argv[2]), &orig);
722 if( annotation_start(&x, &orig) ){
723 fossil_fatal("binary file");
724 }
725 for(i=3; i<g.argc; i++){
726 blob_zero(&b);
727 content_get(name_to_rid(g.argv[i]), &b);
728 if( annotation_step(&x, &b, g.argv[i-1]) ){
729 fossil_fatal("binary file");
730 }
731 }
732 for(i=0; i<x.nOrig; i++){
733 const char *zSrc = x.aOrig[i].zSrc;
734 if( zSrc==0 ) zSrc = g.argv[g.argc-1];
735 printf("%10s: %.*s\n", zSrc, x.aOrig[i].n, x.aOrig[i].z);
736 }
737 }
738
739 /*
740 ** Compute a complete annotation on a file. The file is identified
741 ** by its filename number (filename.fnid) and the baseline in which
742 ** it was checked in (mlink.mid).
743 */
744 static void annotate_file(Annotator *p, int fnid, int mid, int webLabel){
745 Blob toAnnotate; /* Text of the final version of the file */
746 Blob step; /* Text of previous revision */
747 int rid; /* Artifact ID of the file being annotated */
748 char *zLabel; /* Label to apply to a line */
749 Stmt q; /* Query returning all ancestor versions */
750
751 /* Initialize the annotation */
752 rid = db_int(0, "SELECT fid FROM mlink WHERE mid=%d AND fnid=%d",mid,fnid);
753 if( rid==0 ){
754 fossil_panic("file #%d is unchanged in manifest #%d", fnid, mid);
755 }
756 if( !content_get(rid, &toAnnotate) ){
757 fossil_panic("unable to retrieve content of artifact #%d", rid);
758 }
759 db_multi_exec("CREATE TEMP TABLE ok(rid INTEGER PRIMARY KEY)");
760 compute_ancestors(mid, 1000000000);
761 annotation_start(p, &toAnnotate);
762
763 db_prepare(&q,
764 "SELECT mlink.fid, blob.uuid, date(event.mtime), "
765 " coalesce(event.euser,event.user) "
766 " FROM mlink, blob, event"
767 " WHERE mlink.fnid=%d"
768 " AND mlink.mid IN ok"
769 " AND blob.rid=mlink.mid"
770 " AND event.objid=mlink.mid"
771 " ORDER BY event.mtime DESC",
772 fnid
773 );
774 while( db_step(&q)==SQLITE_ROW ){
775 int pid = db_column_int(&q, 0);
776 const char *zUuid = db_column_text(&q, 1);
777 const char *zDate = db_column_text(&q, 2);
778 const char *zUser = db_column_text(&q, 3);
779 if( webLabel ){
780 zLabel = mprintf(
781 "<a href='%s/info/%s' target='infowindow'>%.10s</a> %s %9.9s",
782 g.zTop, zUuid, zUuid, zDate, zUser
783 );
784 }else{
785 zLabel = mprintf("%.10s %s %9.9s", zUuid, zDate, zUser);
786 }
787 content_get(pid, &step);
788 annotation_step(p, &step, zLabel);
789 blob_reset(&step);
790 }
791 db_finalize(&q);
792 }
793
794 /*
795 ** WEBPAGE: annotate
796 **
797 ** Query parameters:
798 **
799 ** checkin=ID The manifest ID at which to start the annotation
800 ** filename=FILENAME The filename.
801 */
802 void annotation_page(void){
803 int mid;
804 int fnid;
805 int i;
806 Annotator ann;
807
808 login_check_credentials();
809 if( !g.okRead ){ login_needed(); return; }
810 mid = name_to_rid(PD("checkin","0"));
811 fnid = db_int(0, "SELECT fnid FROM filename WHERE name=%Q", P("filename"));
812 if( mid==0 || fnid==0 ){ fossil_redirect_home(); }
813 if( !db_exists("SELECT 1 FROM mlink WHERE mid=%d AND fnid=%d",mid,fnid) ){
814 fossil_redirect_home();
815 }
816 style_header("File Annotation");
817 annotate_file(&ann, fnid, mid, g.okHistory);
818 @ <pre>
819 for(i=0; i<ann.nOrig; i++){
820 ((char*)ann.aOrig[i].z)[ann.aOrig[i].n] = 0;
821 @ %s(ann.aOrig[i].zSrc): %h(ann.aOrig[i].z)
822 }
823 @ </pre>
824 style_footer();
825 }
826
827 /*
828 ** COMMAND: annotate
829 **
830 ** %fossil annotate FILENAME
831 **
832 ** Output the text of a file with markings to show when each line of
833 ** the file was last modified.
834 */
835 void annotate_cmd(void){
836 int fnid; /* Filename ID */
837 int fid; /* File instance ID */
838 int mid; /* Manifest where file was checked in */
839 Blob treename; /* FILENAME translated to canonical form */
840 char *zFilename; /* Cannonical filename */
841 Annotator ann; /* The annotation of the file */
842 int i; /* Loop counter */
843
844 db_must_be_within_tree();
845 if (g.argc<3) {
846 usage("FILENAME");
847 }
848 file_tree_name(g.argv[2], &treename, 1);
849 zFilename = blob_str(&treename);
850 fnid = db_int(0, "SELECT fnid FROM filename WHERE name=%Q", zFilename);
851 if( fnid==0 ){
852 fossil_fatal("no such file: %s", zFilename);
853 }
854 fid = db_int(0, "SELECT rid FROM vfile WHERE pathname=%Q", zFilename);
855 if( fid==0 ){
856 fossil_fatal("not part of current checkout: %s", zFilename);
857 }
858 mid = db_int(0, "SELECT mid FROM mlink WHERE fid=%d AND fnid=%d", fid, fnid);
859 if( mid==0 ){
860 fossil_panic("unable to find manifest");
861 }
862 annotate_file(&ann, fnid, mid, 0);
863 for(i=0; i<ann.nOrig; i++){
864 printf("%s: %.*s\n", ann.aOrig[i].zSrc, ann.aOrig[i].n, ann.aOrig[i].z);
865 }
866 }