/* ** Copyright (c) 2007 D. Richard Hipp ** ** This program is free software; you can redistribute it and/or ** modify it under the terms of the Simplified BSD License (also ** known as the "2-Clause License" or "FreeBSD License".) ** This program is distributed in the hope that it will be useful, ** but without any warranty; without even the implied warranty of ** merchantability or fitness for a particular purpose. ** ** Author contact information: ** drh@hwaci.com ** http://www.hwaci.com/drh/ ** ******************************************************************************* ** ** This module implements a 3-way merge */ #include "config.h" #include "merge3.h" #if 0 #define DEBUG(X) X #define ISDEBUG 1 #else #define DEBUG(X) #define ISDEBUG 0 #endif /* The minimum of two integers */ #ifndef min # define min(A,B) (A<B?A:B) #endif /* ** Compare N lines of text from pV1 and pV2. If the lines ** are the same, return true. Return false if one or more of the N ** lines are different. ** ** The cursors on both pV1 and pV2 is unchanged by this comparison. */ static int sameLines(Blob *pV1, Blob *pV2, int N){ char *z1, *z2; int i; char c; if( N==0 ) return 1; z1 = &blob_buffer(pV1)[blob_tell(pV1)]; z2 = &blob_buffer(pV2)[blob_tell(pV2)]; for(i=0; (c=z1[i])==z2[i]; i++){ if( c=='\n' || c==0 ){ N--; if( N==0 || c==0 ) return 1; } } return 0; } /* ** Look at the next edit triple in both aC1 and aC2. (An "edit triple" is ** three integers describing the number of copies, deletes, and inserts in ** moving from the original to the edited copy of the file.) If the three ** integers of the edit triples describe an identical edit, then return 1. ** If the edits are different, return 0. */ static int sameEdit( int *aC1, /* Array of edit integers for file 1 */ int *aC2, /* Array of edit integers for file 2 */ Blob *pV1, /* Text of file 1 */ Blob *pV2 /* Text of file 2 */ ){ if( aC1[0]!=aC2[0] ) return 0; if( aC1[1]!=aC2[1] ) return 0; if( aC1[2]!=aC2[2] ) return 0; if( sameLines(pV1, pV2, aC1[2]) ) return 1; return 0; } /* ** The aC[] array contains triples of integers. Within each triple, the ** elements are: ** ** (0) The number of lines to copy ** (1) The number of lines to delete ** (2) The number of liens to insert ** ** Suppose we want to advance over sz lines of the original file. This routine ** returns true if that advance would land us on a copy operation. It ** returns false if the advance would end on a delete. */ static int ends_at_CPY(int *aC, int sz){ while( sz>0 && (aC[0]>0 || aC[1]>0 || aC[2]>0) ){ if( aC[0]>=sz ) return 1; sz -= aC[0]; if( aC[1]>sz ) return 0; sz -= aC[1]; aC += 3; } return 1; } /* ** pSrc contains an edited file where aC[] describes the edit. Part of ** pSrc has already been output. This routine outputs additional lines ** of pSrc - lines that correspond to the next sz lines of the original ** unedited file. ** ** Note that sz counts the number of lines of text in the original file. ** But text is output from the edited file. So the number of lines transfer ** to pOut might be different from sz. Fewer lines appear in pOut if there ** are deletes. More lines appear if there are inserts. ** ** The aC[] array is updated and the new index into aC[] is returned. */ static int output_one_side( Blob *pOut, /* Write to this blob */ Blob *pSrc, /* The edited file that is to be copied to pOut */ int *aC, /* Array of integer triples describing the edit */ int i, /* Index in aC[] of current location in pSrc */ int sz /* Number of lines in unedited source to output */ ){ while( sz>0 ){ if( aC[i]==0 && aC[i+1]==0 && aC[i+2]==0 ) break; if( aC[i]>=sz ){ blob_copy_lines(pOut, pSrc, sz); aC[i] -= sz; break; } blob_copy_lines(pOut, pSrc, aC[i]); blob_copy_lines(pOut, pSrc, aC[i+2]); sz -= aC[i] + aC[i+1]; i += 3; } return i; } /* ** Text of boundary markers for merge conflicts. */ static const char *const mergeMarker[] = { /*123456789 123456789 123456789 123456789 123456789 123456789 123456789*/ "<<<<<<< BEGIN MERGE CONFLICT: local copy shown first <<<<<<<<<<<<<<<", "||||||| COMMON ANCESTOR content follows ||||||||||||||||||||||||||||", "======= MERGED IN content follows ==================================", ">>>>>>> END MERGE CONFLICT >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" }; /* ** Return true if the input blob contains any CR/LF pairs on the first ** ten lines. This should be enough to detect files that use mainly CR/LF ** line endings without causing a performance impact for LF only files. */ int contains_crlf(Blob *p){ int i; int j = 0; const int maxL = 10; /* Max lines to check */ const char *z = blob_buffer(p); int n = blob_size(p)+1; for(i=1; i<n; ){ if( z[i-1]=='\r' && z[i]=='\n' ) return 1; while( i<n && z[i]!='\n' ){ i++; } j++; if( j>maxL ) return 0; } return 0; } /* ** Ensure that the text in pBlob ends with a new line. ** If useCrLf is true adds "\r\n" otherwise '\n'. */ void ensure_line_end(Blob *pBlob, int useCrLf){ if( pBlob->nUsed<=0 ) return; if( pBlob->aData[pBlob->nUsed-1]!='\n' ){ if( useCrLf ) blob_append_char(pBlob, '\r'); blob_append_char(pBlob, '\n'); } } /* ** Do a three-way merge. Initialize pOut to contain the result. ** ** The merge is an edit against pV2. Both pV1 and pV2 have a ** common origin at pPivot. Apply the changes of pPivot ==> pV1 ** to pV2. ** ** The return is 0 upon complete success. If any input file is binary, ** -1 is returned and pOut is unmodified. If there are merge ** conflicts, the merge proceeds as best as it can and the number ** of conflicts is returns */ static int blob_merge(Blob *pPivot, Blob *pV1, Blob *pV2, Blob *pOut){ int *aC1; /* Changes from pPivot to pV1 */ int *aC2; /* Changes from pPivot to pV2 */ int i1, i2; /* Index into aC1[] and aC2[] */ int nCpy, nDel, nIns; /* Number of lines to copy, delete, or insert */ int limit1, limit2; /* Sizes of aC1[] and aC2[] */ int nConflict = 0; /* Number of merge conflicts seen so far */ int useCrLf = 0; blob_zero(pOut); /* Merge results stored in pOut */ /* If both pV1 and pV2 start with a UTF-8 byte-order-mark (BOM), ** keep it in the output. This should be secure enough not to cause ** unintended changes to the merged file and consistent with what ** users are using in their source files. */ if( starts_with_utf8_bom(pV1, 0) && starts_with_utf8_bom(pV2, 0) ){ blob_append(pOut, (char*)get_utf8_bom(0), -1); } /* Check once to see if both pV1 and pV2 contains CR/LF endings. ** If true, CR/LF pair will be used later to append the ** boundary markers for merge conflicts. */ if( contains_crlf(pV1) && contains_crlf(pV2) ){ useCrLf = 1; } /* Compute the edits that occur from pPivot => pV1 (into aC1) ** and pPivot => pV2 (into aC2). Each of the aC1 and aC2 arrays is ** an array of integer triples. Within each triple, the first integer ** is the number of lines of text to copy directly from the pivot, ** the second integer is the number of lines of text to omit from the ** pivot, and the third integer is the number of lines of text that are ** inserted. The edit array ends with a triple of 0,0,0. */ aC1 = text_diff(pPivot, pV1, 0, 0, 0); aC2 = text_diff(pPivot, pV2, 0, 0, 0); if( aC1==0 || aC2==0 ){ free(aC1); free(aC2); return -1; } blob_rewind(pV1); /* Rewind inputs: Needed to reconstruct output */ blob_rewind(pV2); blob_rewind(pPivot); /* Determine the length of the aC1[] and aC2[] change vectors */ for(i1=0; aC1[i1] || aC1[i1+1] || aC1[i1+2]; i1+=3){} limit1 = i1; for(i2=0; aC2[i2] || aC2[i2+1] || aC2[i2+2]; i2+=3){} limit2 = i2; DEBUG( for(i1=0; i1<limit1; i1+=3){ printf("c1: %4d %4d %4d\n", aC1[i1], aC1[i1+1], aC1[i1+2]); } for(i2=0; i2<limit2; i2+=3){ printf("c2: %4d %4d %4d\n", aC2[i2], aC2[i2+1], aC2[i2+2]); } ) /* Loop over the two edit vectors and use them to compute merged text ** which is written into pOut. i1 and i2 are multiples of 3 which are ** indices into aC1[] and aC2[] to the edit triple currently being ** processed */ i1 = i2 = 0; while( i1<limit1 && i2<limit2 ){ DEBUG( printf("%d: %2d %2d %2d %d: %2d %2d %2d\n", i1/3, aC1[i1], aC1[i1+1], aC1[i1+2], i2/3, aC2[i2], aC2[i2+1], aC2[i2+2]); ) if( aC1[i1]>0 && aC2[i2]>0 ){ /* Output text that is unchanged in both V1 and V2 */ nCpy = min(aC1[i1], aC2[i2]); DEBUG( printf("COPY %d\n", nCpy); ) blob_copy_lines(pOut, pPivot, nCpy); blob_copy_lines(0, pV1, nCpy); blob_copy_lines(0, pV2, nCpy); aC1[i1] -= nCpy; aC2[i2] -= nCpy; }else if( aC1[i1] >= aC2[i2+1] && aC1[i1]>0 && aC2[i2+1]+aC2[i2+2]>0 ){ /* Output edits to V2 that occurs within unchanged regions of V1 */ nDel = aC2[i2+1]; nIns = aC2[i2+2]; DEBUG( printf("EDIT -%d+%d left\n", nDel, nIns); ) blob_copy_lines(0, pPivot, nDel); blob_copy_lines(0, pV1, nDel); blob_copy_lines(pOut, pV2, nIns); aC1[i1] -= nDel; i2 += 3; }else if( aC2[i2] >= aC1[i1+1] && aC2[i2]>0 && aC1[i1+1]+aC1[i1+2]>0 ){ /* Output edits to V1 that occur within unchanged regions of V2 */ nDel = aC1[i1+1]; nIns = aC1[i1+2]; DEBUG( printf("EDIT -%d+%d right\n", nDel, nIns); ) blob_copy_lines(0, pPivot, nDel); blob_copy_lines(0, pV2, nDel); blob_copy_lines(pOut, pV1, nIns); aC2[i2] -= nDel; i1 += 3; }else if( sameEdit(&aC1[i1], &aC2[i2], pV1, pV2) ){ /* Output edits that are identical in both V1 and V2. */ assert( aC1[i1]==0 ); nDel = aC1[i1+1]; nIns = aC1[i1+2]; DEBUG( printf("EDIT -%d+%d both\n", nDel, nIns); ) blob_copy_lines(0, pPivot, nDel); blob_copy_lines(pOut, pV1, nIns); blob_copy_lines(0, pV2, nIns); i1 += 3; i2 += 3; }else { /* We have found a region where different edits to V1 and V2 overlap. ** This is a merge conflict. Find the size of the conflict, then ** output both possible edits separated by distinctive marks. */ int sz = 1; /* Size of the conflict in lines */ nConflict++; while( !ends_at_CPY(&aC1[i1], sz) || !ends_at_CPY(&aC2[i2], sz) ){ sz++; } DEBUG( printf("CONFLICT %d\n", sz); ) blob_append(pOut, mergeMarker[0], -1); ensure_line_end(pOut, useCrLf); i1 = output_one_side(pOut, pV1, aC1, i1, sz); ensure_line_end(pOut, useCrLf); blob_append(pOut, mergeMarker[1], -1); ensure_line_end(pOut, useCrLf); blob_copy_lines(pOut, pPivot, sz); ensure_line_end(pOut, useCrLf); blob_append(pOut, mergeMarker[2], -1); ensure_line_end(pOut, useCrLf); i2 = output_one_side(pOut, pV2, aC2, i2, sz); ensure_line_end(pOut, useCrLf); blob_append(pOut, mergeMarker[3], -1); ensure_line_end(pOut, useCrLf); } /* If we are finished with an edit triple, advance to the next ** triple. */ if( i1<limit1 && aC1[i1]==0 && aC1[i1+1]==0 && aC1[i1+2]==0 ) i1+=3; if( i2<limit2 && aC2[i2]==0 && aC2[i2+1]==0 && aC2[i2+2]==0 ) i2+=3; } /* When one of the two edit vectors reaches its end, there might still ** be an insert in the other edit vector. Output this remaining ** insert. */ DEBUG( printf("%d: %2d %2d %2d %d: %2d %2d %2d\n", i1/3, aC1[i1], aC1[i1+1], aC1[i1+2], i2/3, aC2[i2], aC2[i2+1], aC2[i2+2]); ) if( i1<limit1 && aC1[i1+2]>0 ){ DEBUG( printf("INSERT +%d left\n", aC1[i1+2]); ) blob_copy_lines(pOut, pV1, aC1[i1+2]); }else if( i2<limit2 && aC2[i2+2]>0 ){ DEBUG( printf("INSERT +%d right\n", aC2[i2+2]); ) blob_copy_lines(pOut, pV2, aC2[i2+2]); } free(aC1); free(aC2); return nConflict; } /* ** Return true if the input string contains a merge marker on a line by ** itself. */ int contains_merge_marker(Blob *p){ int i, j; int len = (int)strlen(mergeMarker[0]); const char *z = blob_buffer(p); int n = blob_size(p) - len + 1; assert( len==(int)strlen(mergeMarker[1]) ); assert( len==(int)strlen(mergeMarker[2]) ); assert( len==(int)strlen(mergeMarker[3]) ); assert( count(mergeMarker)==4 ); for(i=0; i<n; ){ for(j=0; j<4; j++){ if( (memcmp(&z[i], mergeMarker[j], len)==0) && (i+1==n || z[i+len]=='\n' || z[i+len]=='\r') ) return 1; } while( i<n && z[i]!='\n' ){ i++; } while( i<n && (z[i]=='\n' || z[i]=='\r') ){ i++; } } return 0; } /* ** Return true if the named file contains an unresolved merge marker line. */ int file_contains_merge_marker(const char *zFullpath){ Blob file; int rc; blob_read_from_file(&file, zFullpath, ExtFILE); rc = contains_merge_marker(&file); blob_reset(&file); return rc; } /* ** COMMAND: 3-way-merge* ** ** Usage: %fossil 3-way-merge BASELINE V1 V2 MERGED ** ** Inputs are files BASELINE, V1, and V2. The file MERGED is generated ** as output. ** ** BASELINE is a common ancestor of two files V1 and V2 that have diverging ** edits. The generated output file MERGED is the combination of all ** changes in both V1 and V2. ** ** This command has no effect on the Fossil repository. It is a utility ** command made available for the convenience of users. This command can ** be used, for example, to help import changes from an upstream project. ** ** Suppose an upstream project has a file named "Xup.c" which is imported ** with modifications to the local project as "Xlocal.c". Suppose further ** that the "Xbase.c" is an exact copy of the last imported "Xup.c". ** Then to import the latest "Xup.c" while preserving all the local changes: ** ** fossil 3-way-merge Xbase.c Xlocal.c Xup.c Xlocal.c ** cp Xup.c Xbase.c ** # Verify that everything still works ** fossil commit ** */ void delta_3waymerge_cmd(void){ Blob pivot, v1, v2, merged; int nConflict; /* We should be done with options.. */ verify_all_options(); if( g.argc!=6 ){ usage("PIVOT V1 V2 MERGED"); } if( blob_read_from_file(&pivot, g.argv[2], ExtFILE)<0 ){ fossil_fatal("cannot read %s", g.argv[2]); } if( blob_read_from_file(&v1, g.argv[3], ExtFILE)<0 ){ fossil_fatal("cannot read %s", g.argv[3]); } if( blob_read_from_file(&v2, g.argv[4], ExtFILE)<0 ){ fossil_fatal("cannot read %s", g.argv[4]); } nConflict = blob_merge(&pivot, &v1, &v2, &merged); if( blob_write_to_file(&merged, g.argv[5])<blob_size(&merged) ){ fossil_fatal("cannot write %s", g.argv[4]); } blob_reset(&pivot); blob_reset(&v1); blob_reset(&v2); blob_reset(&merged); if( nConflict>0 ) fossil_warning("WARNING: %d merge conflicts", nConflict); } /* ** aSubst is an array of string pairs. The first element of each pair is ** a string that begins with %. The second element is a replacement for that ** string. ** ** This routine makes a copy of zInput into memory obtained from malloc and ** performance all applicable substitutions on that string. */ char *string_subst(const char *zInput, int nSubst, const char **azSubst){ Blob x; int i, j; blob_zero(&x); while( zInput[0] ){ for(i=0; zInput[i] && zInput[i]!='%'; i++){} if( i>0 ){ blob_append(&x, zInput, i); zInput += i; } if( zInput[0]==0 ) break; for(j=0; j<nSubst; j+=2){ int n = strlen(azSubst[j]); if( strncmp(zInput, azSubst[j], n)==0 ){ blob_append(&x, azSubst[j+1], -1); zInput += n; break; } } if( j>=nSubst ){ blob_append(&x, "%", 1); zInput++; } } return blob_str(&x); } #if INTERFACE /* ** Flags to the 3-way merger */ #define MERGE_DRYRUN 0x0001 /* ** The MERGE_KEEP_FILES flag specifies that merge_3way() should retain ** its temporary files on error. By default they are removed after the ** merge, regardless of success or failure. */ #define MERGE_KEEP_FILES 0x0002 #endif /* ** This routine is a wrapper around blob_merge() with the following ** enhancements: ** ** (1) If the merge-command is defined, then use the external merging ** program specified instead of the built-in blob-merge to do the ** merging. Panic if the external merger fails. ** ** Not currently implemented ** ** ** (2) If gmerge-command is defined and there are merge conflicts in ** blob_merge() then invoke the external graphical merger to resolve ** the conflicts. ** ** (3) If a merge conflict occurs and gmerge-command is not defined, ** then write the pivot, original, and merge-in files to the ** filesystem. */ int merge_3way( Blob *pPivot, /* Common ancestor (older) */ const char *zV1, /* Name of file for version merging into (mine) */ Blob *pV2, /* Version merging from (yours) */ Blob *pOut, /* Output written here */ unsigned mergeFlags /* Flags that control operation */ ){ Blob v1; /* Content of zV1 */ int rc; /* Return code of subroutines and this routine */ const char *zGMerge; /* Name of the gmerge command */ blob_read_from_file(&v1, zV1, ExtFILE); rc = blob_merge(pPivot, &v1, pV2, pOut); zGMerge = rc<=0 ? 0 : db_get("gmerge-command", 0); if( (mergeFlags & MERGE_DRYRUN)==0 && ((zGMerge!=0 && zGMerge[0]!=0) || (rc!=0 && (mergeFlags & MERGE_KEEP_FILES)!=0)) ){ char *zPivot; /* Name of the pivot file */ char *zOrig; /* Name of the original content file */ char *zOther; /* Name of the merge file */ zPivot = file_newname(zV1, "baseline", 1); blob_write_to_file(pPivot, zPivot); zOrig = file_newname(zV1, "original", 1); blob_write_to_file(&v1, zOrig); zOther = file_newname(zV1, "merge", 1); blob_write_to_file(pV2, zOther); if( rc>0 ){ if( zGMerge && zGMerge[0] ){ char *zOut; /* Temporary output file */ char *zCmd; /* Command to invoke */ const char *azSubst[8]; /* Strings to be substituted */ zOut = file_newname(zV1, "output", 1); azSubst[0] = "%baseline"; azSubst[1] = zPivot; azSubst[2] = "%original"; azSubst[3] = zOrig; azSubst[4] = "%merge"; azSubst[5] = zOther; azSubst[6] = "%output"; azSubst[7] = zOut; zCmd = string_subst(zGMerge, 8, azSubst); printf("%s\n", zCmd); fflush(stdout); fossil_system(zCmd); if( file_size(zOut, RepoFILE)>=0 ){ blob_read_from_file(pOut, zOut, ExtFILE); file_delete(zOut); } fossil_free(zCmd); fossil_free(zOut); } } if( (mergeFlags & MERGE_KEEP_FILES)==0 ){ file_delete(zPivot); file_delete(zOrig); file_delete(zOther); } fossil_free(zPivot); fossil_free(zOrig); fossil_free(zOther); } blob_reset(&v1); return rc; }