/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim: set ts=2 et sw=2 tw=80: */ /* Copyright (c) 2013 D. Richard Hipp This program is free software; you can redistribute it and/or modify it under the terms of the Simplified BSD License (also known as the "2-Clause License" or "FreeBSD License".) This program is distributed in the hope that it will be useful, but without any warranty; without even the implied warranty of merchantability or fitness for a particular purpose. Author contact information: drh@hwaci.com http://www.hwaci.com/drh/ ***************************************************************************** This file houses the code for the fsl_content_xxx() APIS. */ #include #include "fossil-scm/fossil-internal.h" /* Only for debugging */ #include #define MARKER(pfexp) \ do{ printf("MARKER: %s:%d:%s():\t",__FILE__,__LINE__,__func__); \ printf pfexp; \ } while(0) fsl_int_t fsl_content_size( fsl_cx * f, fsl_id_t blobRid ){ fsl_db * dbR = f ? fsl_cx_db_repo(f) : NULL; if(!f) return -3; else if(blobRid<=0) return -4; else if(!dbR) return -5; else{ int rc; fsl_int_t rv = -2; fsl_stmt * q = NULL; rc = fsl_db_prepare_cached(dbR, &q, "SELECT size FROM blob " "WHERE rid=?"); if(!rc){ rc = fsl_stmt_bind_id(q, 1, blobRid); if(!rc){ if(FSL_RC_STEP_ROW==fsl_stmt_step(q)){ rv = (fsl_int_t)fsl_stmt_g_int64(q, 0); } } fsl_stmt_cached_yield(q); } return rv; } } int fsl_content_blob( fsl_cx * f, fsl_id_t blobRid, fsl_buffer * tgt ){ fsl_db * dbR = f ? fsl_cx_db_repo(f) : NULL; if(!f || !tgt) return FSL_RC_MISUSE; else if(blobRid<=0) return FSL_RC_RANGE; else if(!dbR) return FSL_RC_NOT_A_REPO; else{ int rc; fsl_stmt * q = NULL; rc = fsl_db_prepare_cached( dbR, &q, "SELECT content FROM blob " "WHERE rid=? AND size>=0"); if(!rc){ rc = fsl_stmt_bind_id(q, 1, blobRid); if(!rc && (FSL_RC_STEP_ROW==(rc=fsl_stmt_step(q)))){ void const * mem = NULL; fsl_size_t memLen = 0; tgt->used = 0; fsl_stmt_get_blob(q, 0, &mem, &memLen); if(mem && memLen){ rc = fsl_buffer_append(tgt, mem, memLen); if(!rc && fsl_buffer_is_compressed(tgt)){ rc = fsl_buffer_uncompress(tgt, tgt); } } }else if(FSL_RC_STEP_DONE==rc){ rc = FSL_RC_NOT_FOUND; } fsl_stmt_cached_yield(q); } if(rc && !f->error.code && dbR->error.code){ fsl_cx_uplift_db_error(f, dbR); } return rc; } } char fsl_content_is_private(fsl_cx * f, fsl_id_t rid){ fsl_stmt * s1 = NULL; fsl_db * db = fsl_cx_db_repo(f); int rc = db ? fsl_db_prepare_cached(db, &s1, "SELECT 1 FROM private " "WHERE rid=?") : FSL_RC_MISUSE; if(!rc){ rc = fsl_stmt_bind_id(s1, 1, rid); if(!rc) rc = fsl_stmt_step(s1); fsl_stmt_cached_yield(s1); } return rc==FSL_RC_STEP_ROW; } int fsl_content_get( fsl_cx * f, fsl_id_t rid, fsl_buffer * tgt ){ fsl_db * db = f ? fsl_cx_db_repo(f) : NULL; if(!f || !tgt) return FSL_RC_MISUSE; else if(rid<=0){ return fsl_cx_err_set(f, FSL_RC_RANGE, "RID %"FSL_ID_T_PFMT" is out of range.", (fsl_id_t)rid); } else if(!db){ return fsl_cx_err_set(f, FSL_RC_NOT_A_REPO, "Fossil has no repo opened."); } else{ int rc; char gotIt = 0; fsl_id_t nextRid; fsl_acache * ac = &f->cache.arty; tgt->used = 0; if(fsl_id_bag_contains(&f->cache.arty.missing, rid)){ /* Early out if we know the content is not available */ return FSL_RC_NOT_FOUND; } /* Look for the artifact in the cache first */ if( fsl_id_bag_contains(&f->cache.arty.inCache, rid) ){ fsl_size_t i; fsl_acache_line * line; for(i=0; iused; i++){ line = &ac->list[i]; if( line->rid==rid ){ rc = fsl_buffer_copy(&line->content, tgt); line->age = ac->nextAge++; return rc; } } } nextRid = 0; rc = fsl_delta_src_id(f, rid, &nextRid); /* MARKER(("rc=%d, nextRid=%"FSL_ID_T_PFMT"\n", rc, nextRid)); */ if(rc) return rc; if( nextRid == 0 ){ /* This is not a delta, so get its raw content. */ rc = fsl_content_blob(f, rid, tgt); gotIt = 0==rc; }else{ /* Looks like a delta, so let's expand it... */ fsl_int_t n /* number of used entries in 'a' */; fsl_int_t nAlloc = 10 /* number it items allocated in 'a' */; fsl_id_t * a = NULL /* array of rids we expand */; fsl_int_t mx; fsl_buffer delta = fsl_buffer_empty; fsl_buffer next = fsl_buffer_empty /* delta-applied content */ ; assert(nextRid>0); a = fsl_malloc( sizeof(a[0]) * nAlloc ); if(!a) return FSL_RC_OOM; a[0] = rid; a[1] = nextRid; n = 1; while( !fsl_id_bag_contains(&ac->inCache, nextRid) && !fsl_delta_src_id(f, nextRid, &nextRid) && (nextRid>0)){ /* Figure out how big n needs to be... */ ++n; if( n >= nAlloc ){ /* Expand 'a' */ void * remem; if( n > fsl_db_g_int64(db, 0, "SELECT max(rid) FROM blob")){ rc = fsl_cx_err_set(f, FSL_RC_RANGE, "Infinite loop in delta table."); goto end_delta; } nAlloc = nAlloc * 2; remem = fsl_realloc(a, nAlloc*sizeof(a[0])); if(!remem){ rc = FSL_RC_OOM; goto end_delta; } a = (fsl_id_t*)remem; } a[n] = nextRid; } /** Recursively expand deltas to get the content... */ mx = n; rc = fsl_content_get( f, a[n], tgt ); /* MARKER(("Getting content for rid #%"FSL_ID_T_PFMT", rc=%d\n", a[n], rc)); */ --n; for( ; !rc && (n>=0); --n){ rc = fsl_content_blob(f, a[n], &delta); /* MARKER(("Getting/applying delta rid #%"FSL_ID_T_PFMT", rc=%d\n", a[n], rc)); */ if(rc) goto end_delta; if(!delta.used){ assert(!"Is this possible? The v1 tree has a similar " "condition but i naively don't believe it's necessary."); continue; } next = fsl_buffer_empty; rc = fsl_buffer_delta_apply2(tgt, &delta, &next, &f->error); if(rc) goto end_delta; #if 0 /* In my (very simple) tests this cache costs us more than it saves. TODO: re-test this once we can do a 'rebuild', or something more intensive than processing a single manifest's R-card. At that point we can set a f->flags bit to enable or disable this block for per-use-case optimization purposes. We also probably want to cache fsl_deck instances instead of Manifest blobs (fsl_buffer) like fossil(1) does, otherwise this cache really doesn't save us much work/memory. */ /* FIXME??? fsl_content_put_ex() logic uses */ if( (mx-n)%8==0 ){ rc = fsl_acache_insert( ac, a[n+1], tgt ); if(rc){ fsl_buffer_clear(&next); goto end_delta; } assert(!tgt->mem && "Passed to artifact cache."); }else{ fsl_buffer_clear(tgt); } #else if(mx){/*unused var*/} fsl_buffer_clear(tgt); #endif *tgt = next; } end_delta: fsl_buffer_clear(&delta); fsl_free(a); gotIt = 0==rc; } if(!rc){ /* i'm not 100% sure this bit has been ported accurately... */ rc = fsl_id_bag_insert(gotIt ? &f->cache.arty.available : &f->cache.arty.missing, rid); } return rc; } } int fsl_content_get_sym( fsl_cx * f, char const * sym, fsl_buffer * tgt ){ int rc; fsl_db * db = f ? fsl_needs_repo(f) : NULL; fsl_id_t rid = 0; if(!f || !sym || !tgt) return FSL_RC_MISUSE; else if(!db) return FSL_RC_NOT_A_REPO; rc = fsl_sym_to_rid(f, sym, FSL_CATYPE_ANY, &rid); return rc ? rc : fsl_content_get(f, rid, tgt); } /** Mark artifact rid as being available now. Update f's cache to show that everything that was formerly unavailable because rid was missing is now available. Returns 0 on success. f must have an opened repo and rid must be valid. */ static int fsl_content_mark_available(fsl_cx * f, fsl_id_t rid){ fsl_id_bag pending = fsl_id_bag_empty; int rc; fsl_stmt * st = NULL; fsl_db * db = fsl_cx_db_repo(f); assert(f); assert(db); assert(rid>0); if( fsl_id_bag_contains(&f->cache.arty.available, rid) ) return 0; rc = fsl_id_bag_insert(&pending, rid); if(rc) goto end; while( (rid = fsl_id_bag_first(&pending))!=0 ){ fsl_id_bag_remove(&pending, rid); rc = fsl_id_bag_insert(&f->cache.arty.available, rid); if(rc) goto end; fsl_id_bag_remove(&f->cache.arty.missing, rid); if(!st){ rc = fsl_db_prepare_cached(db, &st, "SELECT rid FROM delta " "WHERE srcid=?"); if(rc) goto end; } rc = fsl_stmt_bind_id(st, 1, rid); while( !rc && (FSL_RC_STEP_ROW==fsl_stmt_step(st)) ){ fsl_id_t const nx = fsl_stmt_g_id(st,0); assert(nx>0); rc = fsl_id_bag_insert(&pending, nx); } } end: if(st) fsl_stmt_cached_yield(st); fsl_id_bag_clear(&pending); return rc; } int fsl_content_put_ex( fsl_cx * f, fsl_buffer const * pBlob, fsl_uuid_cstr zUuid, fsl_id_t srcId, fsl_size_t uncompSize, char isPrivate, fsl_id_t * outRid){ fsl_size_t size; fsl_id_t rid; fsl_stmt * s1 = NULL; fsl_buffer cmpr = fsl_buffer_empty; fsl_buffer hash = fsl_buffer_empty; char markAsUnclustered = 0; char markAsUnsent = 1; char isDephantomize = 0; fsl_db * dbR = fsl_cx_db_repo(f); int rc; char inTrans = 0; assert(f); assert(dbR); assert(pBlob); assert(srcId==0 || zUuid!=NULL); assert(!zUuid || fsl_is_uuid(zUuid)); if(!dbR) return FSL_RC_NOT_A_REPO; if(!zUuid){ assert(0==uncompSize); rc = fsl_sha1sum_buffer(pBlob, &hash); }else{ rc = fsl_buffer_append(&hash, zUuid, FSL_UUID_STRLEN); } if(rc) goto end; if(uncompSize){ /* pBlob is assumed to be compressed. */ assert(fsl_buffer_is_compressed(pBlob)); size = uncompSize; }else{ size = pBlob->used; if(srcId>0){ rc = fsl_delta_applied_size(pBlob->mem, pBlob->used, &size); if(rc) goto end; } } rc = fsl_db_transaction_begin(dbR); if(rc) goto end; inTrans = 1; /* Check to see if the entry already exists and if it does whether or not the entry is a phantom. */ rc = fsl_db_prepare_cached(dbR, &s1, "SELECT rid, size FROM blob WHERE uuid=?"); if(rc) goto end; rc = fsl_stmt_bind_text( s1, 1, fsl_buffer_cstr(&hash), (fsl_int_t)hash.used, 0); if(rc) goto end; rc = fsl_stmt_step(s1); switch(rc){ case FSL_RC_STEP_ROW: rid = fsl_stmt_g_id(s1, 0); if( fsl_stmt_g_int64(s1, 1)>=0 ){ /* Either the entry is not a phantom or it is a phantom but we have no data with which to dephantomize it. In either case, there is nothing for us to do other than return the RID. */ /* Reminder: the do-nothing-for-empty-phantom behaviour is arguable (but historical). There is a corner case there involving an empty file. So far, so good, though. After all... all empty files have the same hash. */ fsl_stmt_cached_yield(s1); fsl_db_transaction_end(dbR,0); if(outRid) *outRid = rid; fsl_buffer_clear(&hash); return 0; } break; case FSL_RC_STEP_DONE: /* No entry with the same UUID currently exists */ rid = 0; markAsUnclustered = 1; rc = 0; break; default: goto end; } if(s1){ fsl_stmt_cached_yield(s1); s1 = NULL; } if(rc) goto end; #if 0 /* Requires app-level data. We might need a client hook mechanism or other metadata here. */ /* Construct a received-from ID if we do not already have one */ if( f->cache.rcvid <= 0 ){ /* FIXME: use cached statement. */ rc = fsl_db_exec(dbR, "INSERT INTO rcvfrom(uid, mtime, nonce, ipaddr)" "VALUES(%d, julianday('now'), %Q, %Q)", g.userUid, g.zNonce, g.zIpAddr ); f->cache.rcvid = fsl_db_last_insert_id(dbR); } #endif if( uncompSize ){ cmpr = *pBlob; }else{ rc = fsl_buffer_compress(pBlob, &cmpr); if(rc) goto end; } if( rid>0 ){ #if 0 assert(!"NYI: adding data to phantom. Requires some missing pieces."); rc = fsl_cx_err_set(f, FSL_RC_NYI, "NYI: adding data to phantom. " "Requires missing rcvId pieces."); goto end; #else assert(!"Untested code. It might work, though, if you remove this assert()."); /* We are just adding data to a phantom */ rc = fsl_db_prepare_cached(dbR, &s1, "UPDATE blob SET " "rcvid=?, size=?, content=? " "WHERE rid=?"); if(rc) goto end; rc = fsl_stmt_bind_id(s1, 1, f->cache.rcvId); if(!rc) rc = fsl_stmt_bind_int64(s1, 2, size); if(!rc) rc = fsl_stmt_bind_blob(s1, 3, cmpr.mem, (fsl_int_t)cmpr.used, 0); if(!rc) rc = fsl_stmt_bind_id(s1, 4, rid); if(!rc){ if(FSL_RC_STEP_DONE==fsl_stmt_step(s1)){ /* FIXME: use cached statement. */ rc = fsl_db_exec(dbR, "DELETE FROM phantom " "WHERE rid=%"FSL_ID_T_PFMT, (fsl_id_t)rid); if( !rc && (srcId==0 || 0==fsl_acache_check_available(f, srcId)) ){ isDephantomize = 1; rc = fsl_content_mark_available(f, rid); } } } fsl_stmt_cached_yield(s1); s1 = NULL; #endif }else{ /* We are creating a new entry */ rc = fsl_db_prepare_cached(dbR, &s1, "INSERT INTO blob(rcvid,size,uuid,content) " "VALUES(?,?,?,?)"); if(rc) goto end; rc = fsl_stmt_bind_id(s1, 1, f->cache.rcvId); if(!rc) rc = fsl_stmt_bind_int64(s1, 2, size); if(!rc) rc = fsl_stmt_bind_text(s1, 3, fsl_buffer_cstr(&hash), (fsl_int_t)hash.used, 0); if(!rc) rc = fsl_stmt_bind_blob(s1, 4, cmpr.mem, (fsl_int_t)cmpr.used, 0); if(!rc){ rc = fsl_stmt_step(s1); if(FSL_RC_STEP_DONE==rc){ rc = 0; rid = fsl_db_last_insert_id(dbR); if(!pBlob ){ /* FIXME: use cached statement. */ rc = fsl_db_exec_multi(dbR, "INSERT OR IGNORE INTO phantom " "VALUES(%"FSL_ID_T_PFMT")", (fsl_id_t)rid); markAsUnsent = 0; } if( !rc && (f->cache.markPrivate || isPrivate) ){ /* FIXME: use cached statement. */ rc = fsl_db_exec_multi(dbR, "INSERT INTO private " "VALUES(%"FSL_ID_T_PFMT")", (fsl_id_t)rid); markAsUnclustered = 0; markAsUnsent = 0; } } } fsl_stmt_cached_yield(s1); s1 = NULL; if(rc) goto end; } /* If the srcId is specified, then the data we just added is really a delta. Record this fact in the delta table. */ if( srcId ){ rc = fsl_db_prepare_cached(dbR, &s1, "REPLACE INTO delta(rid,srcid) VALUES(?,?)"); if(!rc){ fsl_stmt_bind_id(s1, 1, rid); fsl_stmt_bind_id(s1, 2, srcId); rc = fsl_stmt_step(s1); if(FSL_RC_STEP_DONE==rc) rc = 0; fsl_stmt_cached_yield(s1); s1 = NULL; } if(rc) goto end; } if( !isDephantomize && fsl_id_bag_contains(&f->cache.arty.missing, rid) && (srcId==0 || (0==fsl_acache_check_available(f,srcId)))){ /* TODO: document what this is for. TODO: figure out what that is. */ rc = fsl_content_mark_available(f, rid); if(rc) goto end; } if( isDephantomize ){ #if 0 /* MISSING */ after_dephantomize(rid, 0); #else assert(!"Missing code: after_dephantomize()"); #endif } /* Add the element to the unclustered table if has never been previously seen. */ if( markAsUnclustered ){ /* FIXME: use a cached statement. */ rc = fsl_db_exec_multi(dbR, "INSERT OR IGNORE INTO unclustered VALUES" "(%"FSL_ID_T_PFMT")", (fsl_id_t)rid); if(rc) goto end; } if( markAsUnsent ){ /* FIXME: use a cached statement. */ rc = fsl_db_exec(dbR, "INSERT OR IGNORE INTO unsent " "VALUES(%"FSL_ID_T_PFMT")", (fsl_id_t)rid); if(rc) goto end; } rc = fsl_repo_verify_before_commit(f, rid); if(rc) goto end /* FSL_RC_OOM is basically the "only possible" failure after this point. */; /* Code after end: relies on the following 2 lines: */ rc = fsl_db_transaction_end(dbR, 0); inTrans = 0; if(!rc){ if(outRid) *outRid = rid; } end: if(inTrans){ assert(0!=rc); fsl_db_transaction_end(dbR,1); } fsl_buffer_clear(&hash); if(!uncompSize){ fsl_buffer_clear(&cmpr); }/* else cmpr.mem (if any) belongs to pBlob */ return rc; } char fsl_acache_expire_oldest(fsl_acache * c){ fsl_int_t i; fsl_int_t mnAge = c->nextAge; fsl_int_t mn = -1; for(i=0; i<(fsl_int_t)c->used; i++){ if( c->list[i].agelist[i].age; mn = i; } } if( mn>=0 ){ fsl_id_bag_remove(&c->inCache, c->list[mn].rid); c->szTotal -= (fsl_int_t)c->list[mn].content.used; fsl_buffer_clear(&c->list[mn].content); --c->used; c->list[mn] = c->list[c->used]; } return (mn>=0) ? 1 : 0; } int fsl_acache_insert(fsl_acache * c, fsl_id_t rid, fsl_buffer *pBlob){ static const fsl_int_t memLimit = 50000000 /* historical value */; static const fsl_size_t countLimit = 500 /* historical value */; fsl_acache_line *p; if( c->used>countLimit || c->szTotal>memLimit ){ fsl_int_t szBefore; do{ szBefore = c->szTotal; fsl_acache_expire_oldest(c); }while( c->szTotal>memLimit && c->szTotalused>=c->capacity ){ fsl_size_t const cap = c->capacity ? (c->capacity*2) : 10; void * remem = fsl_realloc(c->list, cap*sizeof(c->list[0])); if(!remem){ fsl_buffer_clear(pBlob) /* for consistency */; return FSL_RC_OOM; } c->capacity = cap; c->list = (fsl_acache_line*)remem; } p = &c->list[c->used++]; p->rid = rid; p->age = c->nextAge++; c->szTotal += pBlob->used; p->content = *pBlob /* Transfer ownership */; *pBlob = fsl_buffer_empty; return fsl_id_bag_insert(&c->inCache, rid); } void fsl_acache_clear(fsl_acache * c){ #if 0 while(fsl_acache_expire_oldest(c)){} #else fsl_size_t i; for(i=0; iused; i++){ fsl_buffer_clear(&c->list[i].content); } #endif fsl_free(c->list); fsl_id_bag_clear(&c->missing); fsl_id_bag_clear(&c->available); fsl_id_bag_clear(&c->inCache); *c = fsl_acache_empty; } int fsl_acache_check_available(fsl_cx * f, fsl_id_t rid){ fsl_id_t srcid; int depth = 0; /* Limit to recursion depth */ static const int limit = 10000000 /* historical value */; int rc; fsl_acache * c = &f->cache.arty; assert(f); assert(c); assert(rid>0); assert(fsl_cx_db_repo(f)); while( depth++ < limit ){ fsl_int_t cSize = -1; if( fsl_id_bag_contains(&c->missing, rid) ){ return FSL_RC_NOT_FOUND; } else if( fsl_id_bag_contains(&c->available, rid) ){ return 0; } else if( (cSize=fsl_content_size(f, rid)) <0){ rc = fsl_id_bag_insert(&c->missing, rid); return rc ? rc : FSL_RC_NOT_FOUND; } srcid = 0; rc = fsl_delta_src_id(f, rid, &srcid); if(rc) return rc; else if( srcid==0 ){ rc = fsl_id_bag_insert(&c->available, rid); return rc ? rc : 0; } rid = srcid; } assert(!"delta-loop in repository"); return fsl_cx_err_set(f, FSL_RC_CONSISTENCY, "Serious problem: delta-loop in repository"); } int fsl_content_put( fsl_cx * f, fsl_buffer const * pBlob, fsl_id_t * newRid){ #if 1 return fsl_content_put_ex(f, pBlob, NULL, 0, 0, 0, newRid); #else /* EXPERIMENT: if pBlob appears to be compressed, pass the proper uncompressed size value (and required UUID) to put_ex(). Aaarrggg - there's the catch. We cannot know the UUID without decompressing the data. */ fsl_int_t const ucSize = fsl_buffer_is_compressed(pBlob) ? fsl_buffer_uncompressed_size(pBlob) : 0; if(ucSize < 0) return FSL_RC_RANGE; else{ fsl_buffer uuid = fsl_buffer_empty; int rc = fsl_sha1sum_buffer(pBlob, &uuid); if(!rc){ rc = fsl_content_put_ex(f, pBlob, fsl_buffer_cstr(&uuid), 0, (fsl_size_t)ucSize, 0, newRid); } fsl_buffer_clear(&uuid); return rc; } #endif } char fsl_uuid_is_shunned(fsl_cx * f, fsl_uuid_cstr zUuid){ fsl_int32_t i = 0; fsl_db * db = fsl_cx_db_repo(f); if( !db || zUuid==0 || zUuid[0]==0 ) return 0; i = fsl_db_g_int32( db, 0, "SELECT 1 FROM shun WHERE uuid=%Q", zUuid); return 1==i; } int fsl_content_new( fsl_cx * f, fsl_uuid_cstr uuid, char isPrivate, fsl_id_t * newId ){ fsl_id_t rid = 0; int rc; fsl_db * db = fsl_cx_db_repo(f); fsl_stmt * s1 = NULL, * s2 = NULL; if(!f || !uuid) return FSL_RC_MISUSE; else if(!fsl_is_uuid(uuid)) return FSL_RC_RANGE; if(!db) return FSL_RC_NOT_A_REPO; if( fsl_uuid_is_shunned(f, uuid) ){ return fsl_cx_err_set(f, FSL_RC_ACCESS, "UUID is shunned: %s", uuid) /* need new error code? */; } rc = fsl_db_transaction_begin(db); if(rc) return rc; rc = fsl_db_prepare_cached(db, &s1, "INSERT INTO blob(rcvid,size,uuid,content)" "VALUES(0,-1,?,NULL)"); if(rc) goto end; rc = fsl_stmt_bind_text(s1, 1, uuid, FSL_UUID_STRLEN, 0); if(!rc) rc = fsl_stmt_step(s1); fsl_stmt_cached_yield(s1); if(FSL_RC_STEP_DONE!=rc) goto end; else rc = 0; rid = fsl_db_last_insert_id(db); assert(rid>0); rc = fsl_db_prepare_cached(db, &s2, "INSERT INTO phantom VALUES (?)"); if(rc) goto end; rc = fsl_stmt_bind_id(s2, 1, rid); if(!rc) rc = fsl_stmt_step(s2); fsl_stmt_cached_yield(s2); if(FSL_RC_STEP_DONE!=rc) goto end; else rc = 0; if( f->cache.markPrivate || isPrivate ){ /* Should be seldom enough that we don't need to cache this statement. */ rc = fsl_db_exec(db, "INSERT INTO private VALUES(%"FSL_ID_T_PFMT")", (fsl_id_t)rid); }else{ fsl_stmt * s3 = NULL; rc = fsl_db_prepare_cached(db, &s3, "INSERT INTO unclustered VALUES(?)"); if(!rc){ rc = fsl_stmt_bind_id(s3, 1, rid); if(!rc) rc = fsl_stmt_step(s3); fsl_stmt_cached_yield(s3); if(FSL_RC_STEP_DONE!=rc) goto end; else rc = 0; } } if(!rc) rc = fsl_id_bag_insert(&f->cache.arty.missing, rid); end: if(rc){ if(db->error.code && !f->error.code){ fsl_cx_uplift_db_error(f, db); } fsl_db_transaction_rollback(db); } else{ rc = fsl_db_transaction_commit(db); if(!rc && newId) *newId = rid; else if(rc && !f->error.code){ fsl_cx_uplift_db_error(f, db); } } return rc; } int fsl_content_undeltify(fsl_cx * f, fsl_id_t rid){ int rc; fsl_db * db = f ? fsl_cx_db_repo(f) : NULL; fsl_id_t srcid = 0; if(!f) return FSL_RC_MISUSE; else if(!db) return FSL_RC_NOT_A_REPO; else if(rid<=0) return FSL_RC_RANGE; rc = fsl_delta_src_id( f, rid, &srcid ); if(!rc /*??? && (srcid>0)*/){ fsl_buffer x = fsl_buffer_empty; rc = fsl_content_get(f, rid, &x); if( !rc && x.used ){ fsl_stmt s = fsl_stmt_empty; rc = fsl_db_transaction_begin(db) /* Reminder: the original impl does not do this in a transaction, _possibly_ because it's only done from places where a transaction is active (that's unconfirmed). If problems arise, try removing this transaction begin/end. */; if(!rc){ /* TODO: use cached statements */ rc = fsl_db_prepare(db, &s, "UPDATE blob SET content=?," " size=%"FSL_SIZE_T_PFMT " WHERE rid=%"FSL_ID_T_PFMT, (fsl_size_t)x.used, (fsl_id_t)rid); if(!rc){ rc = fsl_buffer_compress(&x, &x); if(!rc){ rc = fsl_stmt_bind_blob(&s, 1, x.mem, (fsl_int_t)x.used, 0); if(!rc){ rc = fsl_stmt_step(&s); if(FSL_RC_STEP_DONE==rc) rc = 0; } } fsl_stmt_finalize(&s); } if(!rc) rc = fsl_db_exec(db, "DELETE FROM delta " "WHERE rid=%"FSL_ID_T_PFMT, (fsl_id_t)rid); if(rc) fsl_db_transaction_rollback(db); else rc = fsl_db_transaction_commit(db); } } fsl_buffer_clear(&x); #if 0 /* v1 does not do this, but that seems like an inconsistency. On that topic Richard says: "When you undelta an artifact, however, it is then stored as plain text. (Actually, as zlib compressed plain text.) There is no possibility of delta loops or bugs in the delta encoder or missing source artifacts. And so there is much less of a chance of losing content. Hence, I didn't see the need to verify the content of artifacts that are undelta-ed." Potential TODO: f->flags FSL_CX_F_PEDANTIC_VERIFICATION, which enables the R-card and this check, and any similarly superfluous ones. */ if(!rc) fsl_repo_verify_before_commit(f, rid); #endif } return rc; } int fsl_content_deltify(fsl_cx * f, fsl_id_t rid, fsl_id_t srcid, char force){ fsl_id_t s; fsl_buffer data = fsl_buffer_empty; fsl_buffer src = fsl_buffer_empty; fsl_buffer delta = fsl_buffer_empty; fsl_db * db = f ? fsl_cx_db_repo(f) : NULL; int rc = 0; enum { MinSizeThreshold = 50 }; if(!f) return FSL_RC_MISUSE; else if(rid<=0 || srcid<=0) return FSL_RC_RANGE; else if(!db) return FSL_RC_NOT_A_REPO; else if( srcid==rid ) return 0; if(!force){ fsl_id_t tmpRid = 0; rc = fsl_delta_src_id(f, rid, &tmpRid); if(tmpRid>0){ /* We already have a delta, it seems. Nothing left to do :-D. Should we return FSL_RC_ALREADY_EXISTS here? */ return 0; } else if(rc) return rc; } if( fsl_content_is_private(f, srcid) && !fsl_content_is_private(f, rid) ){ /* See API doc comments about crossing the private/public boundaries. Do we want to report okay here or FSL_RC_ACCESS? Not yet sure how this routine is used. Since delitifying is an internal optimization/implementation detail, it seems best to return 0 for this case. */ /* MARKER(("REMINDER: arguable return of 0 here.\n")); */ return 0; } /** Undeltify srcid if needed... */ s = srcid; while( (0==(rc=fsl_delta_src_id(f, s, &s))) && (s>0) ){ if( s==rid ){ rc = fsl_content_undeltify(f, srcid); break; } } if(rc) return rc; /* As of here, don't return on error. Use (goto end) instead, or be really careful, b/c buffers might need cleaning. */ rc = fsl_content_get(f, srcid, &src); if(rc || (src.used < MinSizeThreshold) /* See API doc comments about minimum size to delta/undelta. */ ) goto end; rc = fsl_content_get(f, rid, &data); if(rc || (data.used < MinSizeThreshold)) goto end; rc = fsl_buffer_delta_create(&src, &data, &delta); if( !rc && (delta.used <= (data.used * 3 / 4 /* 75% */))){ fsl_stmt * s1 = NULL; fsl_stmt * s2 = NULL; rc = fsl_buffer_compress(&delta, &delta); if(rc) goto end; rc = fsl_db_prepare_cached(db, &s1, "UPDATE blob SET content=? " "WHERE rid=?"); if(!rc){ fsl_stmt_bind_id(s1, 2, rid); rc = fsl_stmt_bind_blob(s1, 1, delta.mem, delta.used, 0); if(!rc){ rc = fsl_db_prepare_cached(db, &s2, "REPLACE INTO delta(rid,srcid) " "VALUES(?,?)"); if(!rc){ fsl_stmt_bind_id(s2, 1, rid); fsl_stmt_bind_id(s2, 2, srcid); rc = fsl_db_transaction_begin(db); if(!rc){ rc = fsl_stmt_step(s1); if(FSL_RC_STEP_DONE==rc){ rc = fsl_stmt_step(s2); if(FSL_RC_STEP_DONE==rc) rc = 0; } if(!rc) rc = fsl_db_transaction_end(db, 0); else fsl_db_transaction_end(db, 1) /* keep rc intact */; } } } } fsl_stmt_cached_yield(s1); fsl_stmt_cached_yield(s2); if(!rc) fsl_repo_verify_before_commit(f, rid); } end: if(rc && db->error.code && !f->error.code){ fsl_cx_uplift_db_error(f,db); } fsl_buffer_clear(&src); fsl_buffer_clear(&data); fsl_buffer_clear(&delta); return rc; } /** Removes all entries from the repo's blob table which are listed in the shun table. */ int fsl_repo_shun_artifacts(fsl_cx * f){ fsl_stmt q = fsl_stmt_empty; int rc; fsl_db * db = f ? fsl_cx_db_repo(f) : NULL; if(!f) return FSL_RC_MISUSE; else if(!db) return FSL_RC_NOT_A_REPO; rc = fsl_db_transaction_begin(db); if(rc) return rc; rc = fsl_db_exec_multi(db, "CREATE TEMP TABLE IF NOT EXISTS " "toshun(rid INTEGER PRIMARY KEY);" "INSERT INTO toshun SELECT rid FROM blob, shun " "WHERE blob.uuid=shun.uuid;" ); if(rc) goto end; /* Ensure that deltas generated from the to-be-shunned data are unpacked into non-delta form... */ rc = fsl_db_prepare(db, &q, "SELECT rid FROM delta WHERE srcid IN toshun" ); if(rc) goto end; while( !rc && (FSL_RC_STEP_ROW==fsl_stmt_step(&q)) ){ fsl_id_t const srcid = fsl_stmt_g_id(&q, 0); rc = fsl_content_undeltify(f, srcid); } fsl_stmt_finalize(&q); if(!rc){ rc = fsl_db_exec_multi(db, "DELETE FROM delta WHERE rid IN toshun;" "DELETE FROM blob WHERE rid IN toshun;" "DROP TABLE toshun;" "DELETE FROM private " "WHERE NOT EXISTS " "(SELECT 1 FROM blob WHERE rid=private.rid);" ); } end: if(!rc) rc = fsl_db_transaction_commit(db); else fsl_db_transaction_rollback(db); if(rc && db->error.code && !f->error.code){ rc = fsl_cx_uplift_db_error(f, db); } return rc; } int fsl_content_make_public(fsl_cx * f, fsl_id_t rid){ fsl_stmt * q = NULL; int rc; fsl_db * db = f ? fsl_cx_db_repo(f) : NULL; if(!f) return FSL_RC_MISUSE; else if(!db) return FSL_RC_NOT_A_REPO; rc = fsl_db_prepare_cached(db, &q, "DELETE FROM private WHERE rid=?"); if(!rc){ rc = fsl_stmt_bind_id(q, 1, rid); if(!rc){ rc = fsl_stmt_step(q); if(FSL_RC_STEP_DONE==rc) rc = 0; } fsl_stmt_cached_yield(q); } if(rc && db->error.code && !f->error.code){ fsl_cx_uplift_db_error(f, db); } return rc; } /** Load the record ID rid and up to N-1 closest ancestors into the "ok" table. */ static int fsl_compute_ancestors( fsl_db * db, fsl_id_t rid, int N, char directOnly ){ fsl_stmt * st = NULL; int rc = fsl_db_prepare_cached(db, &st, "WITH RECURSIVE " " ancestor(rid, mtime) AS (" " SELECT ?, mtime " " FROM event WHERE objid=? " " UNION " " SELECT plink.pid, event.mtime" " FROM ancestor, plink, event" " WHERE plink.cid=ancestor.rid" " AND event.objid=plink.pid %s" " ORDER BY mtime DESC LIMIT ?" " )" "INSERT INTO fsl_computed_ancestors" " SELECT rid FROM ancestor;", directOnly ? "AND plink.isPrim" : "" ); if(!rc){ fsl_stmt_bind_id(st, 1, rid); fsl_stmt_bind_id(st, 2, rid); fsl_stmt_bind_int32(st, 3, (fsl_int32_t)N); rc = fsl_stmt_step(st); if(FSL_RC_STEP_DONE==rc){ rc = 0; } fsl_stmt_cached_yield(st); } return rc; } int fsl_mtime_of_F_card(fsl_cx * f, fsl_id_t vid, fsl_card_F const * fc, fsl_time_t *pMTime){ if(!f || !fc) return FSL_RC_MISUSE; else if(vid<=0) return FSL_RC_RANGE; else if(!fc->uuid){ if(pMTime) *pMTime = 0; return 0; }else{ fsl_id_t fid = fsl_uuid_to_rid(f, fc->uuid); if(fid<=0){ assert(f->error.code); return f->error.code; }else{ return fsl_mtime_of_manifest_file(f, vid, fid, pMTime); } } } int fsl_mtime_of_manifest_file(fsl_cx * f, fsl_id_t vid, fsl_id_t fid, fsl_time_t *pMTime){ fsl_db * db = fsl_needs_repo(f); fsl_stmt * q = NULL; int rc; if(!db) return FSL_RC_NOT_A_REPO; if(fid<=0){ /* Only fetch the checkin time... */ fsl_int64_t i = -1; rc = fsl_db_get_int64(db, &i, "SELECT (mtime-2440587.5)*86400 " "FROM event WHERE objid=%"FSL_ID_T_PFMT " AND type='ci'", (fsl_id_t)vid); if(!rc){ if(i<0) rc = FSL_RC_NOT_FOUND; else if(pMTime) *pMTime = (fsl_time_t)i; } return rc; } if( f->cache.mtimeManifest != vid ){ /* Computing (and keeping) ancestors is relatively costly, so we keep only the copy associated with f->cache.mtimeManifest around. For the general case, we will be feeding this function files from the same manifest. */ f->cache.mtimeManifest = vid; rc = fsl_db_exec_multi(db,"DROP TABLE IF EXISTS temp.fsl_computed_ancestors;" "CREATE TEMP TABLE fsl_computed_ancestors" "(x INTEGER PRIMARY KEY);"); if(!rc){ rc = fsl_compute_ancestors(db, vid, 100000000, 1); } if(rc){ fsl_cx_uplift_db_error(f, db); return rc; } } rc = fsl_db_prepare_cached(db, &q, "SELECT (max(event.mtime)-2440587.5)*86400 FROM mlink, event" " WHERE mlink.mid=event.objid" " AND mlink.fid=?" " AND +mlink.mid IN fsl_computed_ancestors" ); if(!rc){ fsl_stmt_bind_id(q, 1, fid); rc = fsl_stmt_step(q); if( FSL_RC_STEP_ROW==rc ){ rc = 0; if(pMTime) *pMTime = (fsl_time_t)fsl_stmt_g_int64(q, 0); }else{ assert(rc); if(FSL_RC_STEP_DONE==rc) rc = FSL_RC_NOT_FOUND; } fsl_stmt_cached_yield(q); } return rc; } int fsl_card_F_content( fsl_cx * f, fsl_card_F const * fc, fsl_buffer * dest ){ if(!f || !fc || !dest) return FSL_RC_MISUSE; else if(!fc->uuid){ return fsl_cx_err_set(f, FSL_RC_RANGE, "Cannot fetch content of a deleted file " "because it has no UUID."); } else if(!fsl_needs_repo(f)) return FSL_RC_NOT_A_REPO; else{ fsl_id_t const rid = fsl_uuid_to_rid(f, fc->uuid); if(!rid) return fsl_cx_err_set(f, FSL_RC_NOT_FOUND, "UUID not found: %s", fc->uuid); else if(rid<0){ assert(f->error.code); return f->error.code; }else{ return fsl_content_get(f, rid, dest); } } } /** UNTESTED (but closely derived from known-working code). Expects f to have an opened checkout. Assumes zName is resolvable (via fsl_checkout_filename_check() - see that function for the meaning of the relativeToCwd argument) to a path under the current checkout root. It loads the file's contents and stores them into the blob table. If rid is not NULL, *rid is assigned the blob.rid (possibly new, possilbly re-used!). If uuid is not NULL then *uuid is assigned to the content's UUID. The *uuid bytes are owned by the caller, who must eventually fsl_free() them. If content with the same UUID already exists, it does not get re-imported but rid/uuid will (if not NULL) contain the values of any previous content with the same hash. ACHTUNG: this function DOES NOT CARE whether or not the file is actually part of a checkout or not, nor whether it is actually referenced by any checkins, or such, other than that it must resolve to something under the checkout root (to avoid breaking any internal assumptions in fossil about filenames). It will add new repo.filename entries as needed for this function. Thus is can be used to import "shadow files" either not known about by fossil or not _yet_ known about by fossil. Note, however, that fossil(1)'s rebuild command "might" (or might not) remove any such orphaned (in its eyes) blobs/filenames. If parentRid is >0 then it must refer to the previous version of zName's content. The parent version gets deltified vs the new one, but deltification is a suggestion which the library will ignore if (e.g.) the parent content is already a delta of something else. This function does its DB-side work in a transaction, so, e.g. if saving succeeds but deltification of the parent version fails for some reason, the whole save operation is rolled back. Returns 0 on success. On error rid and uuid are not modified. */ int fsl_import_file( fsl_cx * f, char relativeToCwd, char const * zName, fsl_id_t parentRid, fsl_id_t *rid, fsl_uuid_str * uuid ){ fsl_buffer * canon = f ? &f->scratch : NULL; fsl_buffer * nbuf = f ? &f->fsScratch : NULL; fsl_buffer * fbuf = f ? &f->fileContent : NULL; char const * fn; int rc; fsl_id_t fnid = 0; fsl_id_t rcRid = 0; fsl_db * db = f ? fsl_needs_repo(f) : NULL; char inTrans = 0; if(!f || !zName || !*zName) return FSL_RC_MISUSE; else if(!f->ckout.dir) return FSL_RC_NOT_A_CHECKOUT; else if(!db) return FSL_RC_NOT_A_REPO; assert(!fbuf->used && "Misuse of f->fileContent"); assert(!canon->used && "Misuse of f->scratch"); assert(!nbuf->used && "Misuse of f->fsScratch"); assert(f->ckout.dir); /* Normalize the name... i often regret having fsl_checkout_filename_check() return checkout-relative paths. */ rc = fsl_checkout_filename_check(f, relativeToCwd, zName, canon); if(rc) goto end; /* Find or create a repo.filename entry... */ fn = fsl_buffer_cstr(canon); rc = fsl_db_transaction_begin(db); if(rc) goto end; inTrans = 1; rc = fsl_repo_filename_fnid2(f, fn, &fnid, 1); if(rc) goto end; /* Import the file... */ assert(fnid>0); rc = fsl_buffer_appendf(nbuf, "%s%s", f->ckout.dir, fn); if(rc) goto end; fn = fsl_buffer_cstr(nbuf); rc = fsl_buffer_fill_from_filename( fbuf, fn ); if(rc){ fsl_cx_err_set(f, rc, "Error %s importing file: %s", fsl_rc_cstr(rc), fn); goto end; } /* Free up these internal buffers for the following routines... */ fsl_buffer_reset(nbuf); fsl_buffer_reset(canon); fn = NULL; rc = fsl_content_put( f, fbuf, &rcRid ); if(!rc){ assert(rcRid > 0); if(parentRid>0){ /* Make parent version a delta of this one, if possible... */ rc = fsl_content_deltify(f, parentRid, rcRid, 0); } if(!rc){ if(rid) *rid = rcRid; if(uuid){ fsl_cx_err_reset(f); *uuid = fsl_rid_to_uuid(f, rcRid); if(!*uuid) rc = (f->error.code ? f->error.code : FSL_RC_OOM); } } } if(!rc){ assert(inTrans); inTrans = 0; rc = fsl_db_transaction_commit(db); } end: fsl_cx_yield_file_buffer(f); assert(0==fbuf->used); fsl_buffer_reset(nbuf); fsl_buffer_reset(canon); if(rc && inTrans) fsl_db_transaction_rollback(db); return rc; } #undef MARKER