Fossil

Check-in [61ddd63b]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Work toward making fossil work better on large repositories. This version implements a cache in the content manager. It is not clear yet if this is necessarily a good idea - this check-in might end up on an abandoned branch at some point.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 61ddd63b72f1436c9ad18b6bba9a754276ac50fc
User & Date: drh 2008-03-06 22:58:48
Context
2008-03-08
13:49
Add the "Compression Radio" line to the "stat" page. check-in: 9aaad3e7 user: drh tags: trunk
2008-03-06
22:58
Work toward making fossil work better on large repositories. This version implements a cache in the content manager. It is not clear yet if this is necessarily a good idea - this check-in might end up on an abandoned branch at some point. check-in: 61ddd63b user: drh tags: trunk
00:26
Fix problems on the stat page that appear for larger repositories. check-in: 80dc46d6 user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/blob.c.

204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
}

/*
** Copy a blob
*/
void blob_copy(Blob *pTo, Blob *pFrom){
  blob_is_init(pFrom);
  blob_is_init(pTo);
  blob_zero(pTo);
  blob_append(pTo, blob_buffer(pFrom), blob_size(pFrom));
}

/*
** Return a pointer to a null-terminated string for a blob.
*/







<







204
205
206
207
208
209
210

211
212
213
214
215
216
217
}

/*
** Copy a blob
*/
void blob_copy(Blob *pTo, Blob *pFrom){
  blob_is_init(pFrom);

  blob_zero(pTo);
  blob_append(pTo, blob_buffer(pFrom), blob_size(pFrom));
}

/*
** Return a pointer to a null-terminated string for a blob.
*/

Changes to src/content.c.

22
23
24
25
26
27
28
29
30



















































31
32
33
34
35
36
37
























































38
39
40
41
42
43
44
45
46
47
48

49
50
51
52
53





























54

55
56
57
58
59
60
61
62
63
64
65

66
67
68
69
70
71
72
73
74
75
76
77



























78
79

80
81

82
83
84
85
86
87
88
89





90
91
92
93
94
95
96




97
98
99
100
101
102
103
...
192
193
194
195
196
197
198

199
200
201
202
203
204
205
...
247
248
249
250
251
252
253
254
255

256
257
258
259
260
261
262
...
273
274
275
276
277
278
279







280
281
282
283
284
285
286
*******************************************************************************
**
** Procedures store and retrieve records from the repository
*/
#include "config.h"
#include "content.h"
#include <assert.h>

/*



















































** Return the srcid associated with rid.  Or return 0 if rid is 
** original content and not a delta.
*/
static int findSrcid(int rid){
  int srcid = db_int(0, "SELECT srcid FROM delta WHERE rid=%d", rid);
  return srcid;
}

























































/*
** Extract the content for ID rid and put it into the
** uninitialized blob.  Return 1 on success.  If the record
** is a phantom, zero pBlob and return 0.
*/
int content_get(int rid, Blob *pBlob){
  Stmt q;
  Blob src;
  int srcid;
  int rc = 0;

  static Bag inProcess;

  assert( g.repositoryOpen );
  srcid = findSrcid(rid);
  blob_zero(pBlob);





























  if( srcid ){

    if( bag_find(&inProcess, srcid) ){
      db_multi_exec(
        "UPDATE blob SET content=NULL, size=-1 WHERE rid=%d;"
        "DELETE FROM delta WHERE rid=%d;"
        "INSERT OR IGNORE INTO phantom VALUES(%d);",
        srcid, srcid, srcid
      );
      blob_zero(pBlob);
      return 0;
    }
    bag_insert(&inProcess, srcid);

    if( content_get(srcid, &src) ){
      db_prepare(&q, "SELECT content FROM blob WHERE rid=%d AND size>=0", rid);
      if( db_step(&q)==SQLITE_ROW ){
        Blob delta;
        db_ephemeral_blob(&q, 0, &delta);
        blob_uncompress(&delta, &delta);
        blob_init(pBlob,0,0);
        blob_delta_apply(&src, &delta, pBlob);
        blob_reset(&delta);
        rc = 1;
      }
      db_finalize(&q);



























      blob_reset(&src);
    }

    bag_remove(&inProcess, srcid);
  }else{

    db_prepare(&q, "SELECT content FROM blob WHERE rid=%d AND size>=0", rid);
    if( db_step(&q)==SQLITE_ROW ){
      db_ephemeral_blob(&q, 0, pBlob);
      blob_uncompress(pBlob, pBlob);
      rc = 1;
    }
    db_finalize(&q);
  }





  return rc;
}

/*
** Get the contents of a file within a given revision.
*/
int content_get_historical_file(const char *revision, const char *file, Blob *content){




  Blob mfile;
  Manifest m;
  int i, rid=0;
  
  rid = name_to_rid(revision);
  content_get(rid, &mfile);
  
................................................................................
int content_put(Blob *pBlob, const char *zUuid, int srcId){
  int size;
  int rid;
  Stmt s1;
  Blob cmpr;
  Blob hash;
  int markAsUnclustered = 0;

  
  assert( g.repositoryOpen );
  if( pBlob && srcId==0 ){
    sha1sum_blob(pBlob, &hash);
  }else{
    blob_init(&hash, zUuid, -1);
  }
................................................................................
      "UPDATE blob SET rcvid=%d, size=%d, content=:data WHERE rid=%d",
       g.rcvid, size, rid
    );
    blob_compress(pBlob, &cmpr);
    db_bind_blob(&s1, ":data", &cmpr);
    db_exec(&s1);
    db_multi_exec("DELETE FROM phantom WHERE rid=%d", rid);
    if( srcId==0 || db_int(0, "SELECT size FROM blob WHERE rid=%d", srcId)>0 ){
      after_dephantomize(rid, 0);

    }
  }else{
    /* We are creating a new entry */
    db_prepare(&s1,
      "INSERT INTO blob(rcvid,size,uuid,content)"
      "VALUES(%d,%d,'%b',:data)",
       g.rcvid, size, &hash
................................................................................
  }

  /* If the srcId is specified, then the data we just added is
  ** really a delta.  Record this fact in the delta table.
  */
  if( srcId ){
    db_multi_exec("REPLACE INTO delta(rid,srcid) VALUES(%d,%d)", rid, srcId);







  }
  
  /* Add the element to the unclustered table if has never been
  ** previously seen.
  */
  if( markAsUnclustered ){
    db_multi_exec("INSERT OR IGNORE INTO unclustered VALUES(%d)", rid);









>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>











>



<

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

>











>












>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
|
>


>








>
>
>
>
>




|

|
>
>
>
>







 







>







 







|
|
>







 







>
>
>
>
>
>
>







22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159

160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
...
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
...
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
...
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
*******************************************************************************
**
** Procedures store and retrieve records from the repository
*/
#include "config.h"
#include "content.h"
#include <assert.h>

/*
** Macros for debugging
*/
#if 0
# define CONTENT_TRACE(X)  printf X;
#else
# define CONTENT_TRACE(X)
#endif

/*
** The artifact retrival cache
*/
#define MX_CACHE_CNT  50    /* Maximum number of positive cache entries */
#define EXPELL_INTERVAL 5   /* How often to expell from a full cache */
static struct {
  int n;               /* Current number of positive cache entries */
  int nextAge;         /* Age counter for implementing LRU */
  int skipCnt;         /* Used to limit entries expelled from cache */
  struct {             /* One instance of this for each cache entry */
    int rid;                  /* Artifact id */
    int age;                  /* Age.  Newer is larger */
    Blob content;             /* Content of the artifact */
  } a[MX_CACHE_CNT];   /* The positive cache */

  /*
  ** The missing artifact cache.
  **
  ** Artifacts whose record ID are in missingCache cannot be retrieved
  ** either because they are phantoms or because they are a delta that
  ** depends on a phantom.  Artifacts whose content we are certain is
  ** available are in availableCache.  If an artifact is in neither cache
  ** then its current availablity is unknown.
  */
  Bag missing;         /* Cache of artifacts that are incomplete */
  Bag available;       /* Cache of artifacts that are complete */
} contentCache;


/*
** Clear the content cache.
*/
void content_clear_cache(void){
  int i;
  for(i=0; i<contentCache.n; i++){
    blob_reset(&contentCache.a[i].content);
  }
  bag_clear(&contentCache.missing);
  bag_clear(&contentCache.available);
  contentCache.n = 0;
}

/*
** Return the srcid associated with rid.  Or return 0 if rid is 
** original content and not a delta.
*/
static int findSrcid(int rid){
  int srcid = db_int(0, "SELECT srcid FROM delta WHERE rid=%d", rid);
  return srcid;
}

/*
** Check to see if content is available for artifact "rid".  Return
** true if it is.  Return false if rid is a phantom or depends on
** a phantom.
*/
int content_is_available(int rid){
  int srcid;
  if( bag_find(&contentCache.missing, rid) ){
    return 0;
  }
  if( bag_find(&contentCache.available, rid) ){
    return 1;
  }
  if( db_int(-1, "SELECT size FROM blob WHERE rid=%d", rid)<0 ){
    bag_insert(&contentCache.missing, rid);
    return 0;
  }
  srcid = findSrcid(rid);
  if( srcid==0 ){
    bag_insert(&contentCache.available, rid);
    return 1;
  }
  if( content_is_available(srcid) ){
    bag_insert(&contentCache.available, rid);
    return 1;
  }else{
    bag_insert(&contentCache.missing, rid);
    return 0;
  }
}

/*
** Mark artifact rid as being available now.  Update the cache to
** show that everything that was formerly unavailable because rid
** was missing is now available.
*/
static void content_mark_available(int rid){
  Bag pending;
  Stmt q;
  if( bag_find(&contentCache.available, rid) ) return;
  bag_init(&pending);
  bag_insert(&pending, rid);
  while( (rid = bag_first(&pending))!=0 ){
    bag_remove(&pending, rid);
    bag_remove(&contentCache.missing, rid);
    bag_insert(&contentCache.available, rid);
    db_prepare(&q, "SELECT rid FROM delta WHERE srcid=%d", rid);
    while( db_step(&q)==SQLITE_ROW ){
      int nx = db_column_int(&q, 0);
      bag_insert(&pending, nx);
    }
    db_finalize(&q);
  }
  bag_clear(&pending);
}

/*
** Extract the content for ID rid and put it into the
** uninitialized blob.  Return 1 on success.  If the record
** is a phantom, zero pBlob and return 0.
*/
int content_get(int rid, Blob *pBlob){
  Stmt q;
  Blob src;
  int srcid;
  int rc = 0;
  int i;
  static Bag inProcess;

  assert( g.repositoryOpen );

  blob_zero(pBlob);

  /* Early out if we know the content is not available */
  if( bag_find(&contentCache.missing, rid) ){
    CONTENT_TRACE(("%*smiss from cache: %d\n",
                    bag_count(&inProcess), "", rid))
    return 0;
  }

  /* Look for the artifact in the cache first */
  for(i=0; i<contentCache.n; i++){
    if( contentCache.a[i].rid==rid ){
      *pBlob = contentCache.a[i].content;
      blob_zero(&contentCache.a[i].content);
      contentCache.n--;
      if( i<contentCache.n ){
        contentCache.a[i] = contentCache.a[contentCache.n];
      }
      CONTENT_TRACE(("%*shit cache: %d\n", 
                    bag_count(&inProcess), "", rid))
      return 1;
    }
  }

  /* See if we need to apply a delta to find this artifact */
  srcid = findSrcid(rid);
  CONTENT_TRACE(("%*ssearching for %d.  Need %d.\n",
                 bag_count(&inProcess), "", rid, srcid))


  if( srcid ){
    /* Yes, a delta is required */
    if( bag_find(&inProcess, srcid) ){
      db_multi_exec(
        "UPDATE blob SET content=NULL, size=-1 WHERE rid=%d;"
        "DELETE FROM delta WHERE rid=%d;"
        "INSERT OR IGNORE INTO phantom VALUES(%d);",
        srcid, srcid, srcid
      );
      blob_zero(pBlob);
      return 0;
    }
    bag_insert(&inProcess, srcid);

    if( content_get(srcid, &src) ){
      db_prepare(&q, "SELECT content FROM blob WHERE rid=%d AND size>=0", rid);
      if( db_step(&q)==SQLITE_ROW ){
        Blob delta;
        db_ephemeral_blob(&q, 0, &delta);
        blob_uncompress(&delta, &delta);
        blob_init(pBlob,0,0);
        blob_delta_apply(&src, &delta, pBlob);
        blob_reset(&delta);
        rc = 1;
      }
      db_finalize(&q);

      /* Save the srcid artifact in the cache */
      if( contentCache.n<MX_CACHE_CNT ){
        i = contentCache.n++;
      }else if( ((contentCache.skipCnt++)%EXPELL_INTERVAL)!=0 ){
        i = -1;
      }else{
        int j, best;
        best = contentCache.nextAge+1;
        i = -1;
        for(j=0; j<contentCache.n; j++){
          if( contentCache.a[j].age<best ){
            i = j;
            best = contentCache.a[j].age;
          }
        }
        CONTENT_TRACE(("%*sexpell %d from cache\n",
                       bag_count(&inProcess), "", contentCache.a[i].rid))
        blob_reset(&contentCache.a[i].content);
      }
      if( i>=0 ){
        contentCache.a[i].content = src;
        contentCache.a[i].age = contentCache.nextAge++;
        contentCache.a[i].rid = srcid;
        CONTENT_TRACE(("%*sadd %d to cache\n",
                       bag_count(&inProcess), "", srcid))
      }else{
        blob_reset(&src);
      }
    }
    bag_remove(&inProcess, srcid);
  }else{
    /* No delta required.  Read content directly from the database */
    db_prepare(&q, "SELECT content FROM blob WHERE rid=%d AND size>=0", rid);
    if( db_step(&q)==SQLITE_ROW ){
      db_ephemeral_blob(&q, 0, pBlob);
      blob_uncompress(pBlob, pBlob);
      rc = 1;
    }
    db_finalize(&q);
  }
  if( rc==0 ){
    bag_insert(&contentCache.missing, rid);
  }else{
    bag_insert(&contentCache.available, rid);
  }
  return rc;
}

/*
** Get the contents of a file within a given baseline.
*/
int content_get_historical_file(
  const char *revision,    /* Name of the baseline containing the file */
  const char *file,        /* Name of the file */
  Blob *content            /* Write file content here */
){
  Blob mfile;
  Manifest m;
  int i, rid=0;
  
  rid = name_to_rid(revision);
  content_get(rid, &mfile);
  
................................................................................
int content_put(Blob *pBlob, const char *zUuid, int srcId){
  int size;
  int rid;
  Stmt s1;
  Blob cmpr;
  Blob hash;
  int markAsUnclustered = 0;
  int isDephantomize = 0;
  
  assert( g.repositoryOpen );
  if( pBlob && srcId==0 ){
    sha1sum_blob(pBlob, &hash);
  }else{
    blob_init(&hash, zUuid, -1);
  }
................................................................................
      "UPDATE blob SET rcvid=%d, size=%d, content=:data WHERE rid=%d",
       g.rcvid, size, rid
    );
    blob_compress(pBlob, &cmpr);
    db_bind_blob(&s1, ":data", &cmpr);
    db_exec(&s1);
    db_multi_exec("DELETE FROM phantom WHERE rid=%d", rid);
    if( srcId==0 || content_is_available(srcId) ){
      isDephantomize = 1;
      content_mark_available(rid);
    }
  }else{
    /* We are creating a new entry */
    db_prepare(&s1,
      "INSERT INTO blob(rcvid,size,uuid,content)"
      "VALUES(%d,%d,'%b',:data)",
       g.rcvid, size, &hash
................................................................................
  }

  /* If the srcId is specified, then the data we just added is
  ** really a delta.  Record this fact in the delta table.
  */
  if( srcId ){
    db_multi_exec("REPLACE INTO delta(rid,srcid) VALUES(%d,%d)", rid, srcId);
  }
  if( !isDephantomize && bag_find(&contentCache.missing, rid) && 
      (srcId==0 || content_is_available(srcId)) ){
    content_mark_available(rid);
  }
  if( isDephantomize ){
    after_dephantomize(rid, 0);
  }
  
  /* Add the element to the unclustered table if has never been
  ** previously seen.
  */
  if( markAsUnclustered ){
    db_multi_exec("INSERT OR IGNORE INTO unclustered VALUES(%d)", rid);

Changes to src/delta.c.

194
195
196
197
198
199
200
201
202
203
204








205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
  for(i=1, x=64; v>=x; i++, x <<= 6){}
  return i;
}

/*
** Compute a 32-bit checksum on the N-byte buffer.  Return the result.
*/
static unsigned int checksum(const char *zIn, int N){
  const unsigned char *z = (const unsigned char*)zIn;
  unsigned int sum = 0;
  while( N>=4 ){








    sum += (z[0]<<24) | (z[1]<<16) | (z[2]<<8) | z[3];
    z += 4;
    N -= 4;
  }
  if( N>0 ){
    unsigned char zBuf[4];
    memset(zBuf, 0, sizeof(zBuf));
    memcpy(zBuf, z, N);
    z = zBuf;
    sum += (z[0]<<24) | (z[1]<<16) | (z[2]<<8) | z[3];
  }
  return sum;
}

/*
** Maximum number of landmarks to set in the source file.
*/







|
|
|
|
>
>
>
>
>
>
>
>




|
|
|
|
|
<







194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221

222
223
224
225
226
227
228
  for(i=1, x=64; v>=x; i++, x <<= 6){}
  return i;
}

/*
** Compute a 32-bit checksum on the N-byte buffer.  Return the result.
*/
static unsigned int checksum(const char *zIn, size_t N){
  const unsigned char *z = (const unsigned char *)zIn;
  unsigned sum = 0;
  while(N >= 16){
    sum += ((unsigned)z[0] + z[4] + z[8] + z[12]) << 24;
    sum += ((unsigned)z[1] + z[5] + z[9] + z[13]) << 16;
    sum += ((unsigned)z[2] + z[6] + z[10]+ z[14]) << 8;
    sum += ((unsigned)z[3] + z[7] + z[11]+ z[15]);
    z += 16;
    N -= 16;
  }
  while(N >= 4){
    sum += (z[0]<<24) | (z[1]<<16) | (z[2]<<8) | z[3];
    z += 4;
    N -= 4;
  }
  switch(N){
    case 3:   sum += (z[2] << 8);
    case 2:   sum += (z[1] << 16);
    case 1:   sum += (z[0] << 24);
    default:  ;

  }
  return sum;
}

/*
** Maximum number of landmarks to set in the source file.
*/

Changes to src/rebuild.c.

49
50
51
52
53
54
55













































































56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74


75
76
77
78
79
80
81
..
89
90
91
92
93
94
95

96
97
98
99

100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115

116
117
118
119
120
121
122
...
139
140
141
142
143
144
145

146
147
148
149
150
151
152
153
154
@    rn integer primary key,  -- Report number
@    owner text,              -- Owner of this report format (not used)
@    title text,              -- Title of this report
@    cols text,               -- A color-key specification
@    sqlcode text             -- An SQL SELECT statement for this report
@ );
;














































































/*
** Core function to rebuild the infomration in the derived tables of a
** fossil repository from the blobs. This function is shared between
** 'rebuild_database' ('rebuild') and 'reconstruct_cmd'
** ('reconstruct'), both of which have to regenerate this information
** from scratch.
**
** If the randomize parameter is true, then the BLOBs are deliberately
** extracted in a random order.  This feature is used to test the
** ability of fossil to accept records in any order and still
** construct a sane repository.
*/
int rebuild_db(int randomize, int ttyOutput){
  Stmt s;
  int errCnt = 0;
  char *zTable;
  int cnt = 0;



  db_multi_exec(zSchemaUpdates);
  for(;;){
    zTable = db_text(0,
       "SELECT name FROM sqlite_master"
       " WHERE type='table'"
       " AND name NOT IN ('blob','delta','rcvfrom','user','config','shun')");
    if( zTable==0 ) break;
................................................................................
  db_multi_exec(
     "DELETE FROM unclustered"
     " WHERE rid IN (SELECT rid FROM shun JOIN blob USING(uuid))"
  );
  db_multi_exec(
    "DELETE FROM config WHERE name IN ('remote-code', 'remote-maxid')"
  );

  db_prepare(&s,
     "SELECT rid, size FROM blob %s"
     " WHERE NOT EXISTS(SELECT 1 FROM shun WHERE uuid=blob.uuid)",
     randomize ? "ORDER BY random()" : ""

  );
  while( db_step(&s)==SQLITE_ROW ){
    int rid = db_column_int(&s, 0);
    int size = db_column_int(&s, 1);
    if( size>=0 ){
      Blob content;
      if( ttyOutput ){
        cnt++;
        printf("%d...\r", cnt);
        fflush(stdout);
      }
      content_get(rid, &content);
      manifest_crosslink(rid, &content);
      blob_reset(&content);
    }else{
      db_multi_exec("INSERT OR IGNORE INTO phantom VALUES(%d)", rid);

    }
  }
  db_finalize(&s);
  if( ttyOutput ){
    printf("\n");
  }
  return errCnt;
................................................................................
  forceFlag = find_option("force","f",0)!=0;
  randomizeFlag = find_option("randomize", 0, 0)!=0;
  if( g.argc!=3 ){
    usage("REPOSITORY-FILENAME");
  }
  db_open_repository(g.argv[2]);
  db_begin_transaction();

  errCnt = rebuild_db(randomizeFlag, 1);
  if( errCnt && !forceFlag ){
    printf("%d errors. Rolling back changes. Use --force to force a commit.\n",
            errCnt);
    db_end_transaction(1);
  }else{
    db_end_transaction(0);
  }
}







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>













|



<

>
>







 







>

|
|
<
>






<
<
<
<
<

|
<


>







 







>









49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149

150
151
152
153
154
155
156
157
158
159
...
167
168
169
170
171
172
173
174
175
176
177

178
179
180
181
182
183
184





185
186

187
188
189
190
191
192
193
194
195
196
...
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
@    rn integer primary key,  -- Report number
@    owner text,              -- Owner of this report format (not used)
@    title text,              -- Title of this report
@    cols text,               -- A color-key specification
@    sqlcode text             -- An SQL SELECT statement for this report
@ );
;

/*
** Variables used for progress information
*/
static int totalSize;       /* Total number of artifacts to process */
static int processCnt;      /* Number processed so far */
static int ttyOutput;       /* Do progress output */

/*
** Called after each artifact is processed
*/
static void rebuild_step_done(void){
  if( ttyOutput ){
    processCnt++;
    printf("%d (%d%%)...\r", processCnt, (processCnt*100/totalSize));
    fflush(stdout);
  }
}

/*
** Rebuild cross-referencing information for the artifact
** rid with content pBase and all of its descendents.  This
** routine clears the content buffer before returning.
*/
static void rebuild_step(int rid, Blob *pBase){
  Stmt q1;
  Bag children;
  Blob copy;
  Blob *pUse;
  int nChild, i, cid;

  /* Find all children of artifact rid */
  db_prepare(&q1, "SELECT rid FROM delta WHERE srcid=%d", rid);
  bag_init(&children);
  while( db_step(&q1)==SQLITE_ROW ){
    bag_insert(&children, db_column_int(&q1, 0));
  }
  nChild = bag_count(&children);
  db_finalize(&q1);

  /* Crosslink the artifact */
  if( nChild==0 ){
    pUse = pBase;
  }else{
    blob_copy(&copy, pBase);
    pUse = &copy;
  }
  manifest_crosslink(rid, pUse);
  blob_reset(pUse);

  /* Call all children recursively */
  for(cid=bag_first(&children), i=1; cid; cid=bag_next(&children, cid), i++){
    Stmt q2;
    int sz;
    if( nChild==i ){
      pUse = pBase;
    }else{
      blob_copy(&copy, pBase);
      pUse = &copy;
    }
    db_prepare(&q2, "SELECT content, size FROM blob WHERE rid=%d", cid);
    if( db_step(&q2)==SQLITE_ROW && (sz = db_column_int(&q2,1))>=0 ){
      Blob delta;
      db_ephemeral_blob(&q2, 0, &delta);
      blob_uncompress(&delta, &delta);
      blob_delta_apply(pUse, &delta, pUse);
      blob_reset(&delta);
      db_finalize(&q2);
      rebuild_step(cid, pUse);
    }else{
      db_finalize(&q2);
      blob_reset(pUse);
    }
  }
  bag_clear(&children);
  rebuild_step_done();
}

/*
** Core function to rebuild the infomration in the derived tables of a
** fossil repository from the blobs. This function is shared between
** 'rebuild_database' ('rebuild') and 'reconstruct_cmd'
** ('reconstruct'), both of which have to regenerate this information
** from scratch.
**
** If the randomize parameter is true, then the BLOBs are deliberately
** extracted in a random order.  This feature is used to test the
** ability of fossil to accept records in any order and still
** construct a sane repository.
*/
int rebuild_db(int randomize, int doOut){
  Stmt s;
  int errCnt = 0;
  char *zTable;


  ttyOutput = doOut;
  processCnt = 0;
  db_multi_exec(zSchemaUpdates);
  for(;;){
    zTable = db_text(0,
       "SELECT name FROM sqlite_master"
       " WHERE type='table'"
       " AND name NOT IN ('blob','delta','rcvfrom','user','config','shun')");
    if( zTable==0 ) break;
................................................................................
  db_multi_exec(
     "DELETE FROM unclustered"
     " WHERE rid IN (SELECT rid FROM shun JOIN blob USING(uuid))"
  );
  db_multi_exec(
    "DELETE FROM config WHERE name IN ('remote-code', 'remote-maxid')"
  );
  totalSize = db_int(0, "SELECT count(*) FROM blob");
  db_prepare(&s,
     "SELECT rid, size FROM blob"
     " WHERE NOT EXISTS(SELECT 1 FROM shun WHERE uuid=blob.uuid)"

     "   AND NOT EXISTS(SELECT 1 FROM delta WHERE rid=blob.rid)"
  );
  while( db_step(&s)==SQLITE_ROW ){
    int rid = db_column_int(&s, 0);
    int size = db_column_int(&s, 1);
    if( size>=0 ){
      Blob content;





      content_get(rid, &content);
      rebuild_step(rid, &content);

    }else{
      db_multi_exec("INSERT OR IGNORE INTO phantom VALUES(%d)", rid);
      rebuild_step_done();
    }
  }
  db_finalize(&s);
  if( ttyOutput ){
    printf("\n");
  }
  return errCnt;
................................................................................
  forceFlag = find_option("force","f",0)!=0;
  randomizeFlag = find_option("randomize", 0, 0)!=0;
  if( g.argc!=3 ){
    usage("REPOSITORY-FILENAME");
  }
  db_open_repository(g.argv[2]);
  db_begin_transaction();
  ttyOutput = 1;
  errCnt = rebuild_db(randomizeFlag, 1);
  if( errCnt && !forceFlag ){
    printf("%d errors. Rolling back changes. Use --force to force a commit.\n",
            errCnt);
    db_end_transaction(1);
  }else{
    db_end_transaction(0);
  }
}

Changes to src/verify.c.

71
72
73
74
75
76
77

78
79
80
81
82
83
84
static int inFinalVerify = 0;

/*
** This routine is called just prior to each commit operation.  
*/
static int verify_at_commit(void){
  int rid;

  inFinalVerify = 1;
  rid = bag_first(&toVerify);
  while( rid>0 ){
    verify_rid(rid);
    rid = bag_next(&toVerify, rid);
  }
  bag_clear(&toVerify);







>







71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
static int inFinalVerify = 0;

/*
** This routine is called just prior to each commit operation.  
*/
static int verify_at_commit(void){
  int rid;
  content_clear_cache();
  inFinalVerify = 1;
  rid = bag_first(&toVerify);
  while( rid>0 ){
    verify_rid(rid);
    rid = bag_next(&toVerify, rid);
  }
  bag_clear(&toVerify);