Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Work toward making fossil work better on large repositories. This version implements a cache in the content manager. It is not clear yet if this is necessarily a good idea - this check-in might end up on an abandoned branch at some point. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
61ddd63b72f1436c9ad18b6bba9a7542 |
User & Date: | drh 2008-03-06 22:58:48.000 |
Context
2008-03-08
| ||
13:49 | Add the "Compression Radio" line to the "stat" page. ... (check-in: 9aaad3e7 user: drh tags: trunk) | |
2008-03-06
| ||
22:58 | Work toward making fossil work better on large repositories. This version implements a cache in the content manager. It is not clear yet if this is necessarily a good idea - this check-in might end up on an abandoned branch at some point. ... (check-in: 61ddd63b user: drh tags: trunk) | |
00:26 | Fix problems on the stat page that appear for larger repositories. ... (check-in: 80dc46d6 user: drh tags: trunk) | |
Changes
Changes to src/blob.c.
︙ | ︙ | |||
204 205 206 207 208 209 210 | } /* ** Copy a blob */ void blob_copy(Blob *pTo, Blob *pFrom){ blob_is_init(pFrom); | < | 204 205 206 207 208 209 210 211 212 213 214 215 216 217 | } /* ** Copy a blob */ void blob_copy(Blob *pTo, Blob *pFrom){ blob_is_init(pFrom); blob_zero(pTo); blob_append(pTo, blob_buffer(pFrom), blob_size(pFrom)); } /* ** Return a pointer to a null-terminated string for a blob. */ |
︙ | ︙ |
Changes to src/content.c.
︙ | ︙ | |||
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 | ******************************************************************************* ** ** Procedures store and retrieve records from the repository */ #include "config.h" #include "content.h" #include <assert.h> /* ** Return the srcid associated with rid. Or return 0 if rid is ** original content and not a delta. */ static int findSrcid(int rid){ int srcid = db_int(0, "SELECT srcid FROM delta WHERE rid=%d", rid); return srcid; } /* ** Extract the content for ID rid and put it into the ** uninitialized blob. Return 1 on success. If the record ** is a phantom, zero pBlob and return 0. */ int content_get(int rid, Blob *pBlob){ Stmt q; Blob src; int srcid; int rc = 0; static Bag inProcess; assert( g.repositoryOpen ); srcid = findSrcid(rid); | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | | > > > > > > > | | > > > > | 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 | ******************************************************************************* ** ** Procedures store and retrieve records from the repository */ #include "config.h" #include "content.h" #include <assert.h> /* ** Macros for debugging */ #if 0 # define CONTENT_TRACE(X) printf X; #else # define CONTENT_TRACE(X) #endif /* ** The artifact retrival cache */ #define MX_CACHE_CNT 50 /* Maximum number of positive cache entries */ #define EXPELL_INTERVAL 5 /* How often to expell from a full cache */ static struct { int n; /* Current number of positive cache entries */ int nextAge; /* Age counter for implementing LRU */ int skipCnt; /* Used to limit entries expelled from cache */ struct { /* One instance of this for each cache entry */ int rid; /* Artifact id */ int age; /* Age. Newer is larger */ Blob content; /* Content of the artifact */ } a[MX_CACHE_CNT]; /* The positive cache */ /* ** The missing artifact cache. ** ** Artifacts whose record ID are in missingCache cannot be retrieved ** either because they are phantoms or because they are a delta that ** depends on a phantom. Artifacts whose content we are certain is ** available are in availableCache. If an artifact is in neither cache ** then its current availablity is unknown. */ Bag missing; /* Cache of artifacts that are incomplete */ Bag available; /* Cache of artifacts that are complete */ } contentCache; /* ** Clear the content cache. */ void content_clear_cache(void){ int i; for(i=0; i<contentCache.n; i++){ blob_reset(&contentCache.a[i].content); } bag_clear(&contentCache.missing); bag_clear(&contentCache.available); contentCache.n = 0; } /* ** Return the srcid associated with rid. Or return 0 if rid is ** original content and not a delta. */ static int findSrcid(int rid){ int srcid = db_int(0, "SELECT srcid FROM delta WHERE rid=%d", rid); return srcid; } /* ** Check to see if content is available for artifact "rid". Return ** true if it is. Return false if rid is a phantom or depends on ** a phantom. */ int content_is_available(int rid){ int srcid; if( bag_find(&contentCache.missing, rid) ){ return 0; } if( bag_find(&contentCache.available, rid) ){ return 1; } if( db_int(-1, "SELECT size FROM blob WHERE rid=%d", rid)<0 ){ bag_insert(&contentCache.missing, rid); return 0; } srcid = findSrcid(rid); if( srcid==0 ){ bag_insert(&contentCache.available, rid); return 1; } if( content_is_available(srcid) ){ bag_insert(&contentCache.available, rid); return 1; }else{ bag_insert(&contentCache.missing, rid); return 0; } } /* ** Mark artifact rid as being available now. Update the cache to ** show that everything that was formerly unavailable because rid ** was missing is now available. */ static void content_mark_available(int rid){ Bag pending; Stmt q; if( bag_find(&contentCache.available, rid) ) return; bag_init(&pending); bag_insert(&pending, rid); while( (rid = bag_first(&pending))!=0 ){ bag_remove(&pending, rid); bag_remove(&contentCache.missing, rid); bag_insert(&contentCache.available, rid); db_prepare(&q, "SELECT rid FROM delta WHERE srcid=%d", rid); while( db_step(&q)==SQLITE_ROW ){ int nx = db_column_int(&q, 0); bag_insert(&pending, nx); } db_finalize(&q); } bag_clear(&pending); } /* ** Extract the content for ID rid and put it into the ** uninitialized blob. Return 1 on success. If the record ** is a phantom, zero pBlob and return 0. */ int content_get(int rid, Blob *pBlob){ Stmt q; Blob src; int srcid; int rc = 0; int i; static Bag inProcess; assert( g.repositoryOpen ); blob_zero(pBlob); /* Early out if we know the content is not available */ if( bag_find(&contentCache.missing, rid) ){ CONTENT_TRACE(("%*smiss from cache: %d\n", bag_count(&inProcess), "", rid)) return 0; } /* Look for the artifact in the cache first */ for(i=0; i<contentCache.n; i++){ if( contentCache.a[i].rid==rid ){ *pBlob = contentCache.a[i].content; blob_zero(&contentCache.a[i].content); contentCache.n--; if( i<contentCache.n ){ contentCache.a[i] = contentCache.a[contentCache.n]; } CONTENT_TRACE(("%*shit cache: %d\n", bag_count(&inProcess), "", rid)) return 1; } } /* See if we need to apply a delta to find this artifact */ srcid = findSrcid(rid); CONTENT_TRACE(("%*ssearching for %d. Need %d.\n", bag_count(&inProcess), "", rid, srcid)) if( srcid ){ /* Yes, a delta is required */ if( bag_find(&inProcess, srcid) ){ db_multi_exec( "UPDATE blob SET content=NULL, size=-1 WHERE rid=%d;" "DELETE FROM delta WHERE rid=%d;" "INSERT OR IGNORE INTO phantom VALUES(%d);", srcid, srcid, srcid ); blob_zero(pBlob); return 0; } bag_insert(&inProcess, srcid); if( content_get(srcid, &src) ){ db_prepare(&q, "SELECT content FROM blob WHERE rid=%d AND size>=0", rid); if( db_step(&q)==SQLITE_ROW ){ Blob delta; db_ephemeral_blob(&q, 0, &delta); blob_uncompress(&delta, &delta); blob_init(pBlob,0,0); blob_delta_apply(&src, &delta, pBlob); blob_reset(&delta); rc = 1; } db_finalize(&q); /* Save the srcid artifact in the cache */ if( contentCache.n<MX_CACHE_CNT ){ i = contentCache.n++; }else if( ((contentCache.skipCnt++)%EXPELL_INTERVAL)!=0 ){ i = -1; }else{ int j, best; best = contentCache.nextAge+1; i = -1; for(j=0; j<contentCache.n; j++){ if( contentCache.a[j].age<best ){ i = j; best = contentCache.a[j].age; } } CONTENT_TRACE(("%*sexpell %d from cache\n", bag_count(&inProcess), "", contentCache.a[i].rid)) blob_reset(&contentCache.a[i].content); } if( i>=0 ){ contentCache.a[i].content = src; contentCache.a[i].age = contentCache.nextAge++; contentCache.a[i].rid = srcid; CONTENT_TRACE(("%*sadd %d to cache\n", bag_count(&inProcess), "", srcid)) }else{ blob_reset(&src); } } bag_remove(&inProcess, srcid); }else{ /* No delta required. Read content directly from the database */ db_prepare(&q, "SELECT content FROM blob WHERE rid=%d AND size>=0", rid); if( db_step(&q)==SQLITE_ROW ){ db_ephemeral_blob(&q, 0, pBlob); blob_uncompress(pBlob, pBlob); rc = 1; } db_finalize(&q); } if( rc==0 ){ bag_insert(&contentCache.missing, rid); }else{ bag_insert(&contentCache.available, rid); } return rc; } /* ** Get the contents of a file within a given baseline. */ int content_get_historical_file( const char *revision, /* Name of the baseline containing the file */ const char *file, /* Name of the file */ Blob *content /* Write file content here */ ){ Blob mfile; Manifest m; int i, rid=0; rid = name_to_rid(revision); content_get(rid, &mfile); |
︙ | ︙ | |||
192 193 194 195 196 197 198 199 200 201 202 203 204 205 | int content_put(Blob *pBlob, const char *zUuid, int srcId){ int size; int rid; Stmt s1; Blob cmpr; Blob hash; int markAsUnclustered = 0; assert( g.repositoryOpen ); if( pBlob && srcId==0 ){ sha1sum_blob(pBlob, &hash); }else{ blob_init(&hash, zUuid, -1); } | > | 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 | int content_put(Blob *pBlob, const char *zUuid, int srcId){ int size; int rid; Stmt s1; Blob cmpr; Blob hash; int markAsUnclustered = 0; int isDephantomize = 0; assert( g.repositoryOpen ); if( pBlob && srcId==0 ){ sha1sum_blob(pBlob, &hash); }else{ blob_init(&hash, zUuid, -1); } |
︙ | ︙ | |||
247 248 249 250 251 252 253 | "UPDATE blob SET rcvid=%d, size=%d, content=:data WHERE rid=%d", g.rcvid, size, rid ); blob_compress(pBlob, &cmpr); db_bind_blob(&s1, ":data", &cmpr); db_exec(&s1); db_multi_exec("DELETE FROM phantom WHERE rid=%d", rid); | | | > | 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 | "UPDATE blob SET rcvid=%d, size=%d, content=:data WHERE rid=%d", g.rcvid, size, rid ); blob_compress(pBlob, &cmpr); db_bind_blob(&s1, ":data", &cmpr); db_exec(&s1); db_multi_exec("DELETE FROM phantom WHERE rid=%d", rid); if( srcId==0 || content_is_available(srcId) ){ isDephantomize = 1; content_mark_available(rid); } }else{ /* We are creating a new entry */ db_prepare(&s1, "INSERT INTO blob(rcvid,size,uuid,content)" "VALUES(%d,%d,'%b',:data)", g.rcvid, size, &hash |
︙ | ︙ | |||
273 274 275 276 277 278 279 280 281 282 283 284 285 286 | } /* If the srcId is specified, then the data we just added is ** really a delta. Record this fact in the delta table. */ if( srcId ){ db_multi_exec("REPLACE INTO delta(rid,srcid) VALUES(%d,%d)", rid, srcId); } /* Add the element to the unclustered table if has never been ** previously seen. */ if( markAsUnclustered ){ db_multi_exec("INSERT OR IGNORE INTO unclustered VALUES(%d)", rid); | > > > > > > > | 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 | } /* If the srcId is specified, then the data we just added is ** really a delta. Record this fact in the delta table. */ if( srcId ){ db_multi_exec("REPLACE INTO delta(rid,srcid) VALUES(%d,%d)", rid, srcId); } if( !isDephantomize && bag_find(&contentCache.missing, rid) && (srcId==0 || content_is_available(srcId)) ){ content_mark_available(rid); } if( isDephantomize ){ after_dephantomize(rid, 0); } /* Add the element to the unclustered table if has never been ** previously seen. */ if( markAsUnclustered ){ db_multi_exec("INSERT OR IGNORE INTO unclustered VALUES(%d)", rid); |
︙ | ︙ |
Changes to src/delta.c.
︙ | ︙ | |||
194 195 196 197 198 199 200 | for(i=1, x=64; v>=x; i++, x <<= 6){} return i; } /* ** Compute a 32-bit checksum on the N-byte buffer. Return the result. */ | | | | | > > > > > > > > < < < | | > | > | 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 | for(i=1, x=64; v>=x; i++, x <<= 6){} return i; } /* ** Compute a 32-bit checksum on the N-byte buffer. Return the result. */ static unsigned int checksum(const char *zIn, size_t N){ const unsigned char *z = (const unsigned char *)zIn; unsigned sum = 0; while(N >= 16){ sum += ((unsigned)z[0] + z[4] + z[8] + z[12]) << 24; sum += ((unsigned)z[1] + z[5] + z[9] + z[13]) << 16; sum += ((unsigned)z[2] + z[6] + z[10]+ z[14]) << 8; sum += ((unsigned)z[3] + z[7] + z[11]+ z[15]); z += 16; N -= 16; } while(N >= 4){ sum += (z[0]<<24) | (z[1]<<16) | (z[2]<<8) | z[3]; z += 4; N -= 4; } switch(N){ case 3: sum += (z[2] << 8); case 2: sum += (z[1] << 16); case 1: sum += (z[0] << 24); default: ; } return sum; } /* ** Maximum number of landmarks to set in the source file. */ |
︙ | ︙ |
Changes to src/rebuild.c.
︙ | ︙ | |||
49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 | @ rn integer primary key, -- Report number @ owner text, -- Owner of this report format (not used) @ title text, -- Title of this report @ cols text, -- A color-key specification @ sqlcode text -- An SQL SELECT statement for this report @ ); ; /* ** Core function to rebuild the infomration in the derived tables of a ** fossil repository from the blobs. This function is shared between ** 'rebuild_database' ('rebuild') and 'reconstruct_cmd' ** ('reconstruct'), both of which have to regenerate this information ** from scratch. ** ** If the randomize parameter is true, then the BLOBs are deliberately ** extracted in a random order. This feature is used to test the ** ability of fossil to accept records in any order and still ** construct a sane repository. */ | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | < > > | 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 | @ rn integer primary key, -- Report number @ owner text, -- Owner of this report format (not used) @ title text, -- Title of this report @ cols text, -- A color-key specification @ sqlcode text -- An SQL SELECT statement for this report @ ); ; /* ** Variables used for progress information */ static int totalSize; /* Total number of artifacts to process */ static int processCnt; /* Number processed so far */ static int ttyOutput; /* Do progress output */ /* ** Called after each artifact is processed */ static void rebuild_step_done(void){ if( ttyOutput ){ processCnt++; printf("%d (%d%%)...\r", processCnt, (processCnt*100/totalSize)); fflush(stdout); } } /* ** Rebuild cross-referencing information for the artifact ** rid with content pBase and all of its descendents. This ** routine clears the content buffer before returning. */ static void rebuild_step(int rid, Blob *pBase){ Stmt q1; Bag children; Blob copy; Blob *pUse; int nChild, i, cid; /* Find all children of artifact rid */ db_prepare(&q1, "SELECT rid FROM delta WHERE srcid=%d", rid); bag_init(&children); while( db_step(&q1)==SQLITE_ROW ){ bag_insert(&children, db_column_int(&q1, 0)); } nChild = bag_count(&children); db_finalize(&q1); /* Crosslink the artifact */ if( nChild==0 ){ pUse = pBase; }else{ blob_copy(©, pBase); pUse = © } manifest_crosslink(rid, pUse); blob_reset(pUse); /* Call all children recursively */ for(cid=bag_first(&children), i=1; cid; cid=bag_next(&children, cid), i++){ Stmt q2; int sz; if( nChild==i ){ pUse = pBase; }else{ blob_copy(©, pBase); pUse = © } db_prepare(&q2, "SELECT content, size FROM blob WHERE rid=%d", cid); if( db_step(&q2)==SQLITE_ROW && (sz = db_column_int(&q2,1))>=0 ){ Blob delta; db_ephemeral_blob(&q2, 0, &delta); blob_uncompress(&delta, &delta); blob_delta_apply(pUse, &delta, pUse); blob_reset(&delta); db_finalize(&q2); rebuild_step(cid, pUse); }else{ db_finalize(&q2); blob_reset(pUse); } } bag_clear(&children); rebuild_step_done(); } /* ** Core function to rebuild the infomration in the derived tables of a ** fossil repository from the blobs. This function is shared between ** 'rebuild_database' ('rebuild') and 'reconstruct_cmd' ** ('reconstruct'), both of which have to regenerate this information ** from scratch. ** ** If the randomize parameter is true, then the BLOBs are deliberately ** extracted in a random order. This feature is used to test the ** ability of fossil to accept records in any order and still ** construct a sane repository. */ int rebuild_db(int randomize, int doOut){ Stmt s; int errCnt = 0; char *zTable; ttyOutput = doOut; processCnt = 0; db_multi_exec(zSchemaUpdates); for(;;){ zTable = db_text(0, "SELECT name FROM sqlite_master" " WHERE type='table'" " AND name NOT IN ('blob','delta','rcvfrom','user','config','shun')"); if( zTable==0 ) break; |
︙ | ︙ | |||
89 90 91 92 93 94 95 96 | db_multi_exec( "DELETE FROM unclustered" " WHERE rid IN (SELECT rid FROM shun JOIN blob USING(uuid))" ); db_multi_exec( "DELETE FROM config WHERE name IN ('remote-code', 'remote-maxid')" ); db_prepare(&s, | > | | < > < < < < < | < > | 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 | db_multi_exec( "DELETE FROM unclustered" " WHERE rid IN (SELECT rid FROM shun JOIN blob USING(uuid))" ); db_multi_exec( "DELETE FROM config WHERE name IN ('remote-code', 'remote-maxid')" ); totalSize = db_int(0, "SELECT count(*) FROM blob"); db_prepare(&s, "SELECT rid, size FROM blob" " WHERE NOT EXISTS(SELECT 1 FROM shun WHERE uuid=blob.uuid)" " AND NOT EXISTS(SELECT 1 FROM delta WHERE rid=blob.rid)" ); while( db_step(&s)==SQLITE_ROW ){ int rid = db_column_int(&s, 0); int size = db_column_int(&s, 1); if( size>=0 ){ Blob content; content_get(rid, &content); rebuild_step(rid, &content); }else{ db_multi_exec("INSERT OR IGNORE INTO phantom VALUES(%d)", rid); rebuild_step_done(); } } db_finalize(&s); if( ttyOutput ){ printf("\n"); } return errCnt; |
︙ | ︙ | |||
139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 | forceFlag = find_option("force","f",0)!=0; randomizeFlag = find_option("randomize", 0, 0)!=0; if( g.argc!=3 ){ usage("REPOSITORY-FILENAME"); } db_open_repository(g.argv[2]); db_begin_transaction(); errCnt = rebuild_db(randomizeFlag, 1); if( errCnt && !forceFlag ){ printf("%d errors. Rolling back changes. Use --force to force a commit.\n", errCnt); db_end_transaction(1); }else{ db_end_transaction(0); } } | > | 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 | forceFlag = find_option("force","f",0)!=0; randomizeFlag = find_option("randomize", 0, 0)!=0; if( g.argc!=3 ){ usage("REPOSITORY-FILENAME"); } db_open_repository(g.argv[2]); db_begin_transaction(); ttyOutput = 1; errCnt = rebuild_db(randomizeFlag, 1); if( errCnt && !forceFlag ){ printf("%d errors. Rolling back changes. Use --force to force a commit.\n", errCnt); db_end_transaction(1); }else{ db_end_transaction(0); } } |
Changes to src/verify.c.
︙ | ︙ | |||
71 72 73 74 75 76 77 78 79 80 81 82 83 84 | static int inFinalVerify = 0; /* ** This routine is called just prior to each commit operation. */ static int verify_at_commit(void){ int rid; inFinalVerify = 1; rid = bag_first(&toVerify); while( rid>0 ){ verify_rid(rid); rid = bag_next(&toVerify, rid); } bag_clear(&toVerify); | > | 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 | static int inFinalVerify = 0; /* ** This routine is called just prior to each commit operation. */ static int verify_at_commit(void){ int rid; content_clear_cache(); inFinalVerify = 1; rid = bag_first(&toVerify); while( rid>0 ){ verify_rid(rid); rid = bag_next(&toVerify, rid); } bag_clear(&toVerify); |
︙ | ︙ |