Login
f-vdiff.c at [f998037762]
Login

File f-apps/f-vdiff.c artifact 6c0aa72ddd part of check-in f998037762


/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 
/* vim: set ts=2 et sw=2 tw=80: */
/*
   Copyright (c) 2013 D. Richard Hipp
  
   This program is free software; you can redistribute it and/or
   modify it under the terms of the Simplified BSD License (also
   known as the "2-Clause License" or "FreeBSD License".)
  
   This program is distributed in the hope that it will be useful,
   but without any warranty; without even the implied warranty of
   merchantability or fitness for a particular purpose.
  
   Author contact information:
     drh@hwaci.com
     http://www.hwaci.com/drh/
  
  *****************************************************************************
   This file implements a basic 'diff' for in-repo content
   (not yet filesystem-level).
*/

#include "fossil-scm/fossil-cli.h" /* Fossil App mini-framework */

static void fcli_local_help(){
  printf("Usage:\n\t%s [options] version1 version2 [filenames...]\n", fcli.appName);
  printf("\t%s [options] --from=version1 --to=version2 [filenames...]\n\n", fcli.appName);

  puts("\t--from|-v1=VERSION the first version. "
       "May optionally be provided as the first non-flag argument.\n");
  puts("\t--to|-v2=VERSION the second version. "
       "May optionally be provided as the second non-flag argument.\n");

  puts("\t--width|-w=INTEGER enables side-by-side diffs width the given width.\n");

  puts("\t--sbs is equivalent to --width=SOME_UNSPECIFIED_DEFAULT.\n");

  puts("\t--context|-n=INTEGER specifies the number of context lines.\n");

  puts("\t--no-color|-bw disables ANSI colorizing of output.\n");

  puts("\t--html generates an HTML-format diff.\n");

  puts("\t--invert|-i inverts the diffs (not the versions "
       "nor other displayed components, e.g. ordering of UUIDs).\n");

  puts("\t--brief|-b elides actual diffs and only summarizes the changes.\n");

  puts("\t--glob|-g=STRING only lists changes to filenames matching "
       "the given glob (or globs, comma-separated). There is probably a corner "
       "case or two involving renamed files, in particular when diffing "
       "against more than one version away from the original. "
       "This option may be provided multiple times.\n");

  puts("If neither --v1 nor --v2 are supplied (nor implied!) then it behaves as if "
       "it were passed \"--v1=current --v2=.\", where '.' is a symbolic "
       "name for the local checkout.\n");

  puts("All non-flag parameters, after flag processing is finished, are treated "
       "as filenames/globs and diffs are restricted to files matching those "
       "names/globs.\n");
}

static struct VDiffApp {
  char const * glob;
  short contextLines;
  short sbsWidth;
  int diffFlags;
  char brief;
  fsl_buffer fname;
  fsl_buffer fcontent1;
  fsl_buffer fcontent2;
  fsl_buffer fhash;
  fsl_list globs;
} VDiffApp = {
NULL/*glob*/,
5/*contextLines*/,
0/*sbsWidth*/,
0/*diffFlags*/,
0/*brief*/,
fsl_buffer_empty_m/*fname*/,
fsl_buffer_empty_m/*fcontent1*/,
fsl_buffer_empty_m/*fcontent2*/,
fsl_buffer_empty_m/*fhash*/,
fsl_list_empty_m/*globs*/
};


int fsl_checkout_file_content(fsl_cx * f, char relativeToCwd, char const * zName, fsl_buffer * dest ){
  int rc;
  fsl_buffer * fname = &VDiffApp.fname;
  if(!f || !zName || !*zName || !dest) return FSL_RC_MISUSE;
  else if(!fsl_needs_checkout(f)) return FSL_RC_NOT_A_CHECKOUT;

  fname->used = 0;
  rc = fsl_file_canonical_name2( relativeToCwd
                                 ? NULL
                                 : fsl_cx_checkout_dir_name(f, NULL),
                                 zName, fname, 1 );
  if(!rc){
    assert(fname->used);
    if('/'==fname->mem[fname->used-1]){
      rc = fsl_cx_err_set(f, FSL_RC_MISUSE,"Filename may not have a trailing slash.");
      /* If we don't do this, we might end up reading a directory entry in raw form.
         Well, we still might. */
    }else{
      dest->used =0;
      rc = fsl_buffer_fill_from_filename(dest, fsl_buffer_cstr(fname));
    }
  }
  fsl_buffer_clear(fname);
  return rc;
}


int fsl_checkout_mtime(fsl_cx * f,
                       fsl_id_t vid,
                       fsl_card_F const * fc,
                       fsl_time_t * repoMtime,
                       fsl_time_t * localMtime){

  int rc = 0;
  fsl_id_t fid = 0;
  fsl_fstat fst = fsl_fstat_empty;
  if(0==vid){
    fsl_checkout_version_info(f, &vid, NULL);
  }
  fid = fsl_repo_filename_fnid(f, fc->name);
  if(fid<=0){
    rc = fsl_cx_err_get(f, NULL, NULL);
    return rc ? rc : fsl_cx_err_set(f, FSL_RC_NOT_FOUND,
                                    "Could not resolve filename: %s",
                                    fc->name);
  }
  else if(!fid){
    return fsl_cx_err_set(f, FSL_RC_NOT_FOUND,
                          "Could not resolve filename: %s",
                          fc->name);
  }
  if(localMtime){
    rc = fsl_cx_stat(f, 0, fc->name, &fst);
    if(rc){
      return fsl_cx_err_set(f, rc, "Could not stat() file: %s",
                            fc->name);
    }
    *localMtime = fst.mtime;
  }
  if(repoMtime){
    rc = fsl_mtime_of_manifest_file(f, vid, fid, repoMtime);
  }
  
  return rc;
}

int f_vdiff_files(fsl_cx * f,
                  fsl_id_t vid1,
                  fsl_card_F const * fc1,
                  fsl_id_t vid2,
                  fsl_card_F const * fc2){
  int rc = 0;
  fsl_buffer * fContent1 = &VDiffApp.fcontent1;
  fsl_buffer * fContent2 = &VDiffApp.fcontent2;
  fsl_buffer * fhash = &VDiffApp.fhash;
  fsl_time_t rmtime = 0;
  fsl_time_t fmtime = 0;
  fsl_card_F const * fcHashCmp = NULL;
  if(vid1>0 && vid2>0 && !fsl_uuidcmp(fc1->uuid, fc2->uuid)){
    /* No diffs to check */
    return 0;
  }

  /**
     TODO: optimization: use vfile where we can to reduce the
     set of files we scan. This can only work if one version
     is the checked-out versoin and one is the local checkout
     changes.
  */

  /* Else different content in each version OR we have a local file
     and need to load it to see if it's changed. */
  fhash->used = fContent2->used = fContent1->used = 0;

  assert(vid1!=vid2);

  if(0==vid1){
    assert(0 != vid2);
    rc = fsl_checkout_file_content(f, 0, fc1->name, fContent1);
    if(!rc){
      rc = fsl_sha1sum_buffer(fContent1, fhash);
      if(!rc){
        fcHashCmp = fc2;
        rc = fsl_checkout_mtime(f, vid1, fc1, NULL, &fmtime);
      }
    }
  }else{
    rc = fsl_card_F_content(f, fc1, fContent1);
    if(!rc && (0==vid2)){
      /* Collect the repo-side mtime IF the other version==0. */
      rc = fsl_checkout_mtime(f, vid1, fc1, &rmtime, NULL);
    }
  }

  if(rc) return rc;

  /* Repeat for vid2. */
  if(0==vid2){
    assert(0 != vid1);
    rc = fsl_checkout_file_content(f, 0, fc2->name, fContent2);
    if(!rc){
      rc = fsl_sha1sum_buffer(fContent2, fhash);
      if(!rc){
        fcHashCmp = fc1;
        rc = fsl_checkout_mtime(f, vid2, fc2, NULL, &fmtime);
      }
    }
  }else{
    rc = fsl_card_F_content(f, fc2, fContent2);
    if(!rc && (0==vid1)){
      /* Collect the repo-side mtime IF the other version==0. */
      rc = fsl_checkout_mtime(f, vid2, fc2, &rmtime, NULL);
    }
  }

  if(rc) return rc;
  else if(fcHashCmp
          && (0==fsl_uuidcmp(fsl_buffer_cstr(fhash), fcHashCmp->uuid))
          ){
    /* repo-side content is unchanged from local copy. */
    assert(0==rc);
    return 0;
  }else if((fmtime>0) && (fmtime==rmtime)){
    /* One of the above is a local file and rmtime holds the repo-side
       mtime of the other. Assume naively that same time==same
       content, as that will be the case more often then not.
    */
    return 0;
  }else{
    if(VDiffApp.brief){
      char const * zUuid1 = (0==vid1) ? "local" : fc1->uuid;
      char const * zUuid2 = (0==vid2) ? "local" : fc2->uuid;
      fsl_outputf(f, "DIFF: %.8s ==> %.8s %s\n", zUuid1, zUuid2,
                  fc2->name);
    }else{
      /* fossil(1) only outputs Index if there is more than one file to diff
         and no files were explicitly named on the CLI. e.g.:

         fossil diff
         fossil diff foo bar

         will have different output even if foo and bar are the only
         changes in the repo.

         We don't really have enough info to know that here, but
         we can guess based on VDiffApp.globs.used. If it is set
         and we're here then this file matched a glob. i.e. a filename
         was provided, otherwise none was provided.
      */
      if(!VDiffApp.globs.used){
        fsl_outputf(f, "Index: %s\n%.67c\n", fc2->name, '=');
      }
      fsl_outputf(f, "--- %s\n+++ %s\n", fc1->name, fc2->name);
      rc = fsl_diff_text(fContent1, fContent2, fsl_output_f_fsl_cx, f,
                         VDiffApp.contextLines, VDiffApp.sbsWidth,
                         VDiffApp.diffFlags);
      if(rc){
        fcli_err_set(rc, "Error %s generating diff.", fsl_rc_cstr(rc));
      }else{
        f_out("\n") /* only for compat with fossil(1) */;
      }
    }
    return rc;
  }
}

/**
   Outputs a diff of the two given version RID. v1 is, for purposes of
   this algorithm, considered to be the older of the two.

   It sends all output to f_out() and takes its diff-level
   configuration from the VDiffApp global.
*/
static int f_vdiff(fsl_id_t v1, fsl_id_t v2){
  int rc = 0;
  fsl_deck d1 = fsl_deck_empty;
  fsl_deck d2 = fsl_deck_empty;
  fsl_cx * f = fcli_cx();
  fsl_card_F const * fc1 = NULL;
  fsl_card_F const * fc2 = NULL;
  int nameCmp = 0;
  fsl_buffer c1 = fsl_buffer_empty;
  fsl_buffer c2 = fsl_buffer_empty;

  rc = fsl_deck_load_rid(f, &d1, v1, FSL_CATYPE_CHECKIN);
  if(rc) goto end;
  rc = fsl_deck_load_rid(f, &d2, v2, FSL_CATYPE_CHECKIN);
  if(rc) goto end;
  rc = fsl_deck_F_rewind(&d1);
  if(!rc) rc = fsl_deck_F_rewind(&d2);
  if(rc) goto end;

  /*
    Reminder: if v1==0 or v2==0, we need slightly different semantics.
    fsl_deck_load_rid() equates 0 to the current checkout, which is
    half right.  We actually want the content of the current local
    checkout for that case.

    TODO: optimization: if v1==checkout version and v2==local changes,
    filter our result set based on vfile entries which have marked
    changes. We will need fsl_vfile_changes_scan() for that, which is
    current marked internal but should be moved into the public
    API anyway.
  */

#define GLOBMATCH(FC) (!VDiffApp.globs.used ? 1 : !!fsl_glob_list_matches(&VDiffApp.globs, (FC)->name))
  
  fsl_deck_F_next(&d1, &fc1);
  for( fsl_deck_F_next(&d2, &fc2);
       fc2;
       fsl_deck_F_next(&d2, &fc2)){

    char const * zNameToCmp = fc2->priorName ? fc2->priorName : fc2->name;
    if(!GLOBMATCH(fc2)) continue;
    nameCmp = 0;
    while(fc1 && (0>(nameCmp = fsl_strcmp(fc1->name, zNameToCmp)))){
      /* v1 has files with lexically smaller names which v2 does not have. */
      if(GLOBMATCH(fc1)){
        f_out("REMOVED: %s\n", fc1->name);
      }
      fsl_deck_F_next(&d1, &fc1);
    }

    if(fc2->priorName){
      f_out("RENAMED: %s ==> %s\n", fc2->priorName, fc2->name);
    }

    if(fc1 && (0==nameCmp)){
      /* Same filename in both checkins */
      rc = f_vdiff_files(f, v1, fc1, v2, fc2);
#if 0
      if(0!=fsl_uuidcmp(fc1->uuid, fc2->uuid)){
        /* Different content in each version. */
        f_out("DIFF: %.8s ==> %.8s %s\n", fc1->uuid, fc2->uuid,
              fc2->name);
        if(!VDiffApp.brief){
          rc = fsl_card_F_content(f, fc1, &c1);
          if(!rc) rc = fsl_card_F_content(f, fc2, &c2);
          if(rc) goto end;
          rc = fsl_diff_text(&c1, &c2, fsl_output_f_fsl_cx, f,
                             VDiffApp.contextLines, VDiffApp.sbsWidth,
                             VDiffApp.diffFlags);
          if(rc){
            fcli_err_set(rc, "Error %s generating diff.", fsl_rc_cstr(rc));
            goto end;
          }
        }
      }
#endif
      if(rc) goto end;
      else fsl_deck_F_next(&d1, &fc1);
    }else if(!fc1){
      /* File was added between v1 and v2. */
      if(!fc2->priorName){ /* We already noted the RENAME case */
        f_out("ADDED: %.8s %s\n", fc1->uuid, fc1->name);
      }
    }
    assert(nameCmp>=0 && "The <0 case was handled by the while loop above!");
  }/*foreach (v2 F-card) loop*/

  if(nameCmp>0){
    /* v1 has files at the end of the list with lexically larger names
       which v2 does not have. */
    while(fc1){
      if(GLOBMATCH(fc1)){
        f_out("REMOVED: %s\n", fc1->name);
      }
      fsl_deck_F_next(&d1, &fc1);
    }
  }

#undef GLOBMATCH
  end:
  fsl_deck_finalize(&d1);
  fsl_deck_finalize(&d2);
  fsl_buffer_clear(&c1);
  fsl_buffer_clear(&c2);
  return rc;
}

int main(int argc, char * const * argv ){
  int rc = 0;
  char * vFrom = NULL;
  char * vTo = NULL;
  char * glob = NULL;
  char * tmpStr = NULL;
  fsl_cx * f;
  fsl_id_t idFrom = -1, idTo = -1;
  char const * checkoutAlias = ".";
  fcli.appHelp = fcli_local_help;
  rc = fcli_setup(argc, argv);
  if(FSL_RC_BREAK==rc) /* --help */ return 0;
  else if(rc) goto end;


  /* Set up/validate args... */
  f = fcli_cx();
  if(!fsl_cx_db_repo(f)){
    rc = fcli_err_set(FSL_RC_NOT_A_REPO,
                      "Requires a repository db. See --help.");
    goto end;
  }

  while(fcli_flag2("g","glob", &glob)){
    fsl_glob_list_parse(&VDiffApp.globs, glob);
    fsl_free(glob);
    glob = NULL;
  }

  if(fsl_isatty(1)
     && !fcli_flag2("bw", "no-color", NULL)){
    VDiffApp.diffFlags |= FSL_DIFF_ANSI_COLOR;
  }

  VDiffApp.brief = fcli_flag2("b", "brief", NULL);

  if(fcli_flag2("i", "invert", NULL)){
    VDiffApp.diffFlags |= FSL_DIFF_INVERT;
  }

  if(fcli_flag("html", NULL)){
    VDiffApp.diffFlags |= FSL_DIFF_HTML;
  }

  if(fcli_flag("sbs", NULL)){
    VDiffApp.sbsWidth = 60;
  }
  if(fcli_flag2("w", "width", &tmpStr)){
    VDiffApp.sbsWidth = (short)atoi(tmpStr);
    fsl_free(tmpStr);
    tmpStr = NULL;
  }

  if(fcli_flag2("n", "context", &tmpStr)){
    VDiffApp.contextLines = atoi(tmpStr);
    fsl_free(tmpStr);
    tmpStr = NULL;
  }

  fcli_flag_or_arg( "v1", "from", &vFrom);
  fcli_flag_or_arg( "v2", "to", &vTo);
  /* assert(!vFrom); */
  if(!vFrom && !vTo){
    /* Special case: compare current checkout repo version vs local copy. */
    vFrom = fsl_mprintf("current");
    idTo = 0;
  }else if(!vFrom || !vTo){
    rc = fcli_err_set(FSL_RC_MISUSE, "Both of -v1 UUID and -v2 UUID are required.");
    goto end;
  }


  if(0==fsl_strcmp(vFrom, checkoutAlias)) idFrom = 0;
  else rc = fsl_sym_to_rid(f, vFrom, FSL_CATYPE_CHECKIN, &idFrom);
  if(!rc && idTo<0){
    if(0==fsl_strcmp(vTo, checkoutAlias)) idTo = 0;
    else rc = fsl_sym_to_rid(f, vTo, FSL_CATYPE_CHECKIN, &idTo);
  }
  if(rc) goto end;
  else if(idFrom==idTo){
    rc = fcli_err_set(FSL_RC_RANGE,
                      "Cowardly refusing to diff a version "
                      "against itself.");
    goto end;
  }



  if(fcli_has_unused_flags(0)) goto end;

  while((glob = fcli_next_arg(1))){
    fsl_glob_list_parse(&VDiffApp.globs, glob);
    fsl_free(glob);
    glob = NULL;
  }

  if(!idTo || !idFrom){
    if(!fsl_cx_db_checkout(f)){
      rc = fcli_err_set(FSL_RC_NOT_A_CHECKOUT,
                        "Using the '.' (local checkout) version "
                        "alias requires a checkout.");
      goto end;
    }
  }

  assert(idFrom>=0);
  assert(idTo>=0);

  rc = f_vdiff( idFrom, idTo );


  end:
  fsl_free(vFrom);
  fsl_free(vTo);
  fsl_glob_list_clear(&VDiffApp.globs);
  fsl_buffer_clear(&VDiffApp.fname);
  fsl_buffer_clear(&VDiffApp.fcontent1);
  fsl_buffer_clear(&VDiffApp.fcontent2);
  fsl_buffer_clear(&VDiffApp.fhash);
  return fcli_end_of_main(rc);
}