/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim: set ts=2 et sw=2 tw=80: */ /* Copyright 2013-2021 The Libfossil Authors, see LICENSES/BSD-2-Clause.txt SPDX-License-Identifier: BSD-2-Clause-FreeBSD SPDX-FileCopyrightText: 2021 The Libfossil Authors SPDX-ArtifactOfProjectName: Libfossil SPDX-FileType: Code Heavily indebted to the Fossil SCM project (https://fossil-scm.org). */ #include "libfossil.h" #include #include /* strlen() */ #include /* NULL on linux */ #include #include #define MARKER(pfexp) \ do{ printf("MARKER: %s:%d:%s():\t",__FILE__,__LINE__,__func__); \ printf pfexp; \ } while(0) #define buffer_is_external(b) (b->mem && 0==b->capacity) /** Materializes external buffer b by allocating b->used+extra+1 bytes, copying b->used bytes from b->mem to the new block, NUL-terminating the block, and replacing b->mem with the new block. Returns 0 on success, else FSL_RC_OOM. Asserts that b is an external buffer. */ static int fsl__buffer_materialize( fsl_buffer * const b, fsl_size_t extra ){ assert(buffer_is_external(b)); fsl_size_t const n = b->used + extra + 1; unsigned char * x = (unsigned char *)fsl_malloc(n); if(!x) return FSL_RC_OOM; memcpy(x, b->mem, b->used); b->capacity = n; x[b->used] = 0; b->mem = x; return 0; } int fsl_buffer_materialize( fsl_buffer * const b ){ return buffer_is_external(b) ? fsl__buffer_materialize(b, 0) : 0; } #define buffer_materialize(B,N) (buffer_is_external(B) ? fsl__buffer_materialize((B),(N)) : 0) void fsl_buffer_external( fsl_buffer * const b, void const * mem, fsl_int_t n ){ assert(!b->capacity); if(n<0) n =(fsl_int_t)fsl_strlen((char const *)mem); b->used = n; b->cursor = 0; b->mem = (unsigned char *)mem; b->capacity = 0; } fsl_buffer * fsl_buffer_reuse( fsl_buffer * const b ){ if(buffer_is_external(b)){ *b = fsl_buffer_empty; }else{ if(b->capacity){ assert(b->mem); b->mem[0] = 0; b->used = 0; } b->cursor = 0; } return b; } void fsl_buffer_clear( fsl_buffer * const buf ){ if(buf->capacity) fsl_free(buf->mem); *buf = fsl_buffer_empty; } int fsl_buffer_reserve( fsl_buffer * const buf, fsl_size_t n ){ if( ! buf ) return FSL_RC_MISUSE; else if( 0 == n ){ if(!buffer_is_external(buf)){ fsl_free(buf->mem); }/* else if it has memory, it's owned elsewhere */ *buf = fsl_buffer_empty; return 0; }else if( !buffer_is_external(buf) && buf->capacity >= n ){ assert(buf->mem); return 0; }else{ unsigned char * x; bool const isExt = buffer_is_external(buf); assert((buf->used < n) && "Buffer in-use greater than capacity!"); if(isExt && n<=buf->used){ /*For external buffers, always keep at least the initially-pointed-to size. */ n = buf->used + 1; } x = (unsigned char *)fsl_realloc( isExt ? NULL : buf->mem, n ); if( !x ) return FSL_RC_OOM; else if(isExt){ memcpy( x, buf->mem, buf->used ); x[buf->used] = 0; }else{ memset( x + buf->used, 0, n - buf->used ); } buf->mem = x; buf->capacity = n; return 0; } } int fsl_buffer_resize( fsl_buffer * const b, fsl_size_t n ){ if(buffer_is_external(b)){ if(n==b->used) return 0; else if(n==0){ b->capacity = 0; fsl_buffer_external(b, "", 0); return 0; } unsigned char * x = (unsigned char *)fsl_malloc( n+1/*NUL*/ ); if( !x ) return FSL_RC_OOM; memcpy(x, b->mem, n < b->used ? n : b->used); x[n] = 0; b->mem = x; b->capacity = n+1; b->used = n; return 0; }else if(n && (b->capacity == n+1)){ b->used = n; b->mem[n] = 0; return 0; }else{ unsigned char * x = (unsigned char *)fsl_realloc( b->mem, n+1/*NUL*/ ); if( ! x ) return FSL_RC_OOM; if(n > b->capacity){ /* zero-fill new parts */ memset( x + b->capacity, 0, n - b->capacity +1/*NUL*/ ); } b->capacity = n + 1 /*NUL*/; b->used = n; b->mem = x; b->mem[b->used] = 0; return 0; } } int fsl_buffer_compare(fsl_buffer const * const lhs, fsl_buffer const * const rhs){ fsl_size_t const szL = lhs->used; fsl_size_t const szR = rhs->used; fsl_size_t const sz = (szLmem, rhs->mem, sz); if(0 == rc){ rc = (szL==szR) ? 0 : ((szLused; fsl_size_t const szR = rhs->used; fsl_size_t i; unsigned char const *buf1; unsigned char const *buf2; unsigned char rc = 0; if( szL!=szR || szL==0 ) return 1; buf1 = lhs->mem; buf2 = rhs->mem; for( i=0; iused; if(len<0) len = (fsl_int_t)fsl_strlen((char const *)data); if(buffer_materialize(b, (fsl_size_t)len + 1)) return FSL_RC_OOM; assert(b->capacity ? !!b->mem : !b->mem); assert(b->used <= b->capacity); sz += len + 1/*NUL*/; int const rc = b->capacitycapacity >= sz); if(len>0) memcpy(b->mem + b->used, data, (size_t)len); b->used += len; b->mem[b->used] = 0; } return rc; } int fsl_buffer_appendfv( fsl_buffer * const b, char const * fmt, va_list args){ return fsl_appendfv( fsl_output_f_buffer, b, fmt, args ); } int fsl_buffer_appendf( fsl_buffer * const b, char const * fmt, ... ){ if(!b || !fmt) return FSL_RC_MISUSE; else{ int rc; va_list args; va_start(args,fmt); rc = fsl_buffer_appendfv( b, fmt, args ); va_end(args); return rc; } } char const * fsl_buffer_cstr(fsl_buffer const * const b){ return b ? (char const *)b->mem : NULL; } char const * fsl_buffer_cstr2(fsl_buffer const * const b, fsl_size_t * const len){ char const * rc = NULL; if(b){ rc = (char const *)b->mem; if(len) *len = b->used; } return rc; } char * fsl_buffer_str(fsl_buffer const * const b){ return (char *)b->mem; } #if 0 fsl_size_t fsl_buffer_size(fsl_buffer const * const b){ return b->used; } fsl_size_t fsl_buffer_capacity(fsl_buffer const * const b){ return b->capacity; } #endif bool fsl_data_is_compressed(unsigned char const * const mem, fsl_size_t len){ if(!mem || (len<6)) return 0; #if 0 else return ('x'==mem[4]) && (0234==mem[5]); /* This check fails for one particular artifact in the tcl core. Notes gathered while debugging... https://core.tcl.tk/tcl/ Delta manifest #5f37dcc3 while processing file #687 (1-based): FSL_RC_RANGE: "Delta: copy extends past end of input" To reproduce from tcl repo: f-acat 5f37dcc3 | f-mfparse -r More details: Filename: library/encoding/gb2312-raw.enc Content: dba09c670f24d47b95d12d4bb9704391b81dda9a That artifact is a delta of bccc899015b688d5c426bc791c2fcde3a03a3eb5, which is actually two files: library/encoding/euc-cn.enc library/encoding/gb2312.enc When we go to apply the delta, the contents of bccc8 appear to be badly compressed data. They have the 'x' at byte offset 4 but not the 0234 at byte offset 5. Turns out it is the fsl_buffer_is_compressed() impl which fails for that one. */ #else else{ /** Adapted from: https://blog.2of1.org/2011/03/03/decompressing-zlib-images/ Remember that fossil-compressed data has a 4-byte big-endian header holding the uncompressed size of the data, so we skip those first 4 bytes. See also: https://tools.ietf.org/html/rfc6713 search for "magic number". */ int16_t const head = (((int16_t)mem[4]) << 8) | mem[5]; /* MARKER(("isCompressed header=%04x\n", head)); */ switch(head){ case 0x083c: case 0x087a: case 0x08b8: case 0x08f6: case 0x1838: case 0x1876: case 0x18b4: case 0x1872: case 0x2834: case 0x2872: case 0x28b0: case 0x28ee: case 0x3830: case 0x386e: case 0x38ac: case 0x38ea: case 0x482c: case 0x486a: case 0x48a8: case 0x48e6: case 0x5828: case 0x5866: case 0x58a4: case 0x58e2: case 0x6824: case 0x6862: case 0x68bf: case 0x68fd: case 0x7801: case 0x785e: case 0x789c: case 0x78da: return true; default: return false; } } #endif } bool fsl_buffer_is_compressed(fsl_buffer const *buf){ return fsl_data_is_compressed( buf->mem, buf->used ); } fsl_int_t fsl_data_uncompressed_size(unsigned char const *mem, fsl_size_t len){ return fsl_data_is_compressed(mem,len) ? ((mem[0]<<24) + (mem[1]<<16) + (mem[2]<<8) + mem[3]) : -1; } fsl_int_t fsl_buffer_uncompressed_size(fsl_buffer const * b){ return fsl_data_uncompressed_size(b->mem, b->used); } int fsl_buffer_compress(fsl_buffer const *pIn, fsl_buffer *pOut){ unsigned int nIn = pIn->used; unsigned int nOut = 13 + nIn + (nIn+999)/1000; fsl_buffer temp = fsl_buffer_empty; int rc = fsl_buffer_resize(&temp, nOut+4); if(rc) return rc; else{ unsigned long int nOut2; unsigned char *outBuf; unsigned long int outSize; outBuf = temp.mem; outBuf[0] = nIn>>24 & 0xff; outBuf[1] = nIn>>16 & 0xff; outBuf[2] = nIn>>8 & 0xff; outBuf[3] = nIn & 0xff; nOut2 = (long int)nOut; rc = compress(&outBuf[4], &nOut2, pIn->mem, pIn->used); if(rc){ fsl_buffer_clear(&temp); return FSL_RC_ERROR; } outSize = nOut2+4; rc = fsl_buffer_resize(&temp, outSize); if(rc){ fsl_buffer_clear(&temp); }else{ fsl_buffer_swap_free(&temp, pOut, -1); assert(0==temp.used); assert(outSize==pOut->used); } return rc; } } int fsl_buffer_compress2(fsl_buffer const *pIn1, fsl_buffer const *pIn2, fsl_buffer *pOut){ unsigned int nIn = pIn1->used + pIn2->used; unsigned int nOut = 13 + nIn + (nIn+999)/1000; fsl_buffer temp = fsl_buffer_empty; int rc; rc = fsl_buffer_resize(&temp, nOut+4); if(rc) return rc; else{ unsigned char *outBuf; z_stream stream; outBuf = temp.mem; outBuf[0] = nIn>>24 & 0xff; outBuf[1] = nIn>>16 & 0xff; outBuf[2] = nIn>>8 & 0xff; outBuf[3] = nIn & 0xff; stream.zalloc = (alloc_func)0; stream.zfree = (free_func)0; stream.opaque = 0; stream.avail_out = nOut; stream.next_out = &outBuf[4]; deflateInit(&stream, 9); stream.avail_in = pIn1->used; stream.next_in = pIn1->mem; deflate(&stream, 0); stream.avail_in = pIn2->used; stream.next_in = pIn2->mem; deflate(&stream, 0); deflate(&stream, Z_FINISH); rc = fsl_buffer_resize(&temp, stream.total_out + 4); deflateEnd(&stream); if(!rc){ temp.used = stream.total_out + 4; if( pOut==pIn1 ) fsl_buffer_reserve(pOut, 0); else if( pOut==pIn2 ) fsl_buffer_reserve(pOut, 0); assert(!pOut->mem); *pOut = temp; }else{ fsl_buffer_reserve(&temp, 0); } return rc; } } int fsl_buffer_uncompress(fsl_buffer const * const pIn, fsl_buffer * const pOut){ unsigned int nOut; unsigned char *inBuf; unsigned int const nIn = pIn->used; fsl_buffer temp = fsl_buffer_empty; int rc; unsigned long int nOut2; if(nIn<=4 || !fsl_data_is_compressed(pIn->mem, pIn->used)){ if(pIn==pOut || !pIn->mem) rc = 0; else{ fsl_buffer_reuse(pOut); rc = fsl_buffer_append(pOut, pIn->mem, pIn->used); } return rc; } inBuf = pIn->mem; nOut = (inBuf[0]<<24) + (inBuf[1]<<16) + (inBuf[2]<<8) + inBuf[3]; /* MARKER(("decompress size: %u\n", nOut)); */ if(pIn!=pOut && pOut->capacity>=nOut+1){ assert(pIn->mem != pOut->mem); #if 0 /* why does this cause corruption (in the form of overwriting a buffer somewhere in the fsl_content_get() constellation)? fsl_repo_rebuild() works but fsl_repo_extract() can trigger it: (FSL_RC_RANGE): Delta: copy extends past end of input */ fsl_buffer_external(&temp, pOut->mem, pOut->capacity); #else fsl_buffer_swap(&temp, pOut); #endif }else{ rc = fsl_buffer_reserve(&temp, nOut+1); if(rc) return rc; temp.mem[nOut] = 0; } nOut2 = (long int)nOut; rc = uncompress(temp.mem, &nOut2, &inBuf[4], nIn - 4) /* In some libz versions (<1.2.4, apparently), valgrind says there's an uninitialized memory access somewhere under uncompress(), _presumably_ for one of these arguments, but i can't find it. fsl_buffer_reserve() always memsets() new bytes to 0. Turns out it's a known problem: https://www.zlib.net/zlib_faq.html#faq36 */; switch(rc){ case 0: /* this is always true but having this assert here makes me nervous: assert(nOut2 == nOut); */ assert(nOut2<=nOut); temp.mem[nOut2] = 0; temp.used = (fsl_size_t)nOut2; #if 1 fsl_buffer_swap(&temp, pOut); #else if(temp.mem!=pOut->mem){ if(pOut->capacity>=temp.capacity){ pOut->used = 0; MARKER(("uncompress() re-using target buffer.\n")); fsl_buffer_append(pOut, temp.mem, temp.capacity); }else{ fsl_buffer_swap(pOut, &temp); } } #endif break; case Z_DATA_ERROR: rc = FSL_RC_CONSISTENCY; break; case Z_MEM_ERROR: rc = FSL_RC_OOM; break; case Z_BUF_ERROR: assert(!"Cannot happen!"); rc = FSL_RC_RANGE; break; default: rc = FSL_RC_ERROR; break; } if(temp.mem!=pOut->mem) fsl_buffer_clear(&temp); return rc; } int fsl_buffer_fill_from( fsl_buffer * const dest, fsl_input_f src, void * const state ) { int rc; enum { BufSize = 512 * 8 }; char rbuf[BufSize]; fsl_size_t total = 0; fsl_size_t rlen = 0; if( !dest || ! src ) return FSL_RC_MISUSE; fsl_buffer_reuse(dest); while(1){ rlen = BufSize; rc = src( state, rbuf, &rlen ); if( rc ) break; total += rlen; if(totalcapacity < (total+1) ){ rc = fsl_buffer_reserve( dest, total + ((rlenmem + dest->used, rbuf, rlen ); dest->used += rlen; if( rlen < BufSize ) break; } if( !rc && dest->used ){ assert( dest->used < dest->capacity ); dest->mem[dest->used] = 0; } return rc; } int fsl_buffer_fill_from_FILE( fsl_buffer * const dest, FILE * const src ){ return fsl_buffer_fill_from( dest, fsl_input_f_FILE, src ); } int fsl_buffer_fill_from_filename( fsl_buffer * const dest, char const * filename ){ int rc; FILE * src; fsl_fstat st = fsl_fstat_empty; /* This stat() is only an optimization to reserve all needed memory up front. */ rc = fsl_stat( filename, &st, 1 ); if(!rc && st.size>0){ rc = fsl_buffer_reserve(dest, st.size +1/*NUL terminator*/); if(rc) return rc; } /* Else it might not be a real file, e.g. "-", so we'll try anyway... */ src = fsl_fopen(filename,"rb"); if(!src) rc = fsl_errno_to_rc(errno, FSL_RC_IO); else { rc = fsl_buffer_fill_from( dest, fsl_input_f_FILE, src ); fsl_fclose(src); } return rc; } void fsl_buffer_swap( fsl_buffer * left, fsl_buffer * right ){ fsl_buffer const tmp = *left; *left = *right; *right = tmp; } void fsl_buffer_swap_free( fsl_buffer * left, fsl_buffer * right, int clearWhich ){ fsl_buffer_swap(left, right); if(0 != clearWhich) fsl_buffer_reserve((clearWhich<0) ? left : right, 0); } int fsl_buffer_copy( fsl_buffer * const dest, fsl_buffer const * const src ){ fsl_buffer_reuse(dest); return src->used ? fsl_buffer_append( dest, src->mem, src->used ) : 0; } int fsl_buffer_delta_apply2( fsl_buffer const * const orig, fsl_buffer const * const pDelta, fsl_buffer * const pTarget, fsl_error * const pErr){ int rc; fsl_size_t n = 0; fsl_buffer out = fsl_buffer_empty; rc = fsl_delta_applied_size( pDelta->mem, pDelta->used, &n); if(rc){ if(pErr){ fsl_error_set(pErr, rc, "fsl_delta_applied_size() failed."); } return rc; } rc = fsl_buffer_resize( &out, n ); if(rc) return rc; rc = fsl_delta_apply2( orig->mem, orig->used, pDelta->mem, pDelta->used, out.mem, pErr); if(0==rc){ fsl_buffer_swap(&out, pTarget); } fsl_buffer_clear(&out); return rc; } int fsl_buffer_delta_apply( fsl_buffer const * const orig, fsl_buffer const * const pDelta, fsl_buffer * const pTarget){ return fsl_buffer_delta_apply2(orig, pDelta, pTarget, NULL); } void fsl_buffer_defossilize( fsl_buffer * const b ){ fsl_bytes_defossilize( b->mem, &b->used ); } int fsl_buffer_to_filename( fsl_buffer const * const b, char const * fname ){ FILE * f; int rc = 0; if(!b || !fname) return FSL_RC_MISUSE; f = fsl_fopen(fname, "wb"); if(!f) rc = fsl_errno_to_rc(errno, FSL_RC_IO); else{ if(b->used) { size_t const frc = fwrite(b->mem, b->used, 1, f); rc = (1==frc) ? 0 : FSL_RC_IO; } fsl_fclose(f); } return rc; } int fsl_buffer_delta_create( fsl_buffer const * const src, fsl_buffer const * const newVers, fsl_buffer * const delta){ if((src == newVers) || (src==delta) || (newVers==delta)) return FSL_RC_MISUSE; int rc = fsl_buffer_reserve( delta, newVers->used + 60 ); if(!rc){ delta->used = 0; rc = fsl_delta_create( src->mem, src->used, newVers->mem, newVers->used, delta->mem, &delta->used ); } return rc; } int fsl_output_f_buffer( void * state, void const * src, fsl_size_t n ){ return (!state || !src) ? FSL_RC_MISUSE : fsl_buffer_append((fsl_buffer*)state, src, n); } int fsl_finalizer_f_buffer( void * state, void * mem ){ fsl_buffer * b = (fsl_buffer*)mem; fsl_buffer_reserve(b, 0); *b = fsl_buffer_empty; return 0; } int fsl_buffer_strftime(fsl_buffer * const b, char const * format, const struct tm *timeptr){ if(!b || !format || !*format || !timeptr) return FSL_RC_MISUSE; else{ enum {BufSize = 128}; char buf[BufSize]; fsl_size_t const len = fsl_strftime(buf, BufSize, format, timeptr); return len ? fsl_buffer_append(b, buf, (fsl_int_t)len) : FSL_RC_RANGE; } } int fsl_buffer_stream_lines(fsl_output_f fTo, void * const toState, fsl_buffer * const pFrom, fsl_size_t N){ char *z = (char *)pFrom->mem; fsl_size_t i = pFrom->cursor; fsl_size_t n = pFrom->used; fsl_size_t cnt = 0; int rc = 0; if( N==0 ) return 0; while( imem[pFrom->cursor], i - pFrom->cursor); } if(!rc){ pFrom->cursor = i; } return rc; } int fsl_buffer_copy_lines(fsl_buffer * const pTo, fsl_buffer * const pFrom, fsl_size_t N){ #if 1 return fsl_buffer_stream_lines( pTo ? fsl_output_f_buffer : NULL, pTo, pFrom, N ); #else char *z = (char *)pFrom->mem; fsl_size_t i = pFrom->cursor; fsl_size_t n = pFrom->used; fsl_size_t cnt = 0; int rc = 0; if( N==0 ) return 0; while( imem[pFrom->cursor], i - pFrom->cursor); } if(!rc){ pFrom->cursor = i; } return rc; #endif } int fsl_input_f_buffer( void * state, void * dest, fsl_size_t * n ){ fsl_buffer * b = (fsl_buffer*)state; fsl_size_t const from = b->cursor; fsl_size_t to; fsl_size_t c; if(from >= b->used){ *n = 0; return 0; } to = from + *n; if(to>b->used) to = b->used; c = to - from; if(c){ memcpy(dest, b->mem+from, c); b->cursor += c; } *n = c; return 0; } int fsl_buffer_compare_file( fsl_buffer const * b, char const * zFile ){ int rc; fsl_fstat fst = fsl_fstat_empty; rc = fsl_stat(zFile, &fst, 1); if(rc || (FSL_FSTAT_TYPE_FILE != fst.type)) return -1; else if(b->used < fst.size) return -1; else if(b->used > fst.size) return 1; else{ #if 1 FILE * f; f = fsl_fopen(zFile,"r"); if(!f) rc = -1; else{ fsl_buffer fc = *b /* so fsl_input_f_buffer() can manipulate its cursor */; rc = fsl_stream_compare(fsl_input_f_buffer, &fc, fsl_input_f_FILE, f); assert(fc.mem==b->mem); fsl_fclose(f); } #else fsl_buffer fc = fsl_buffer_empty; rc = fsl_buffer_fill_from_filename(&fc, zFile); if(rc){ rc = -1; }else{ rc = fsl_buffer_compare(b, &fc); } fsl_buffer_clear(&fc); #endif return rc; } } char * fsl_buffer_take(fsl_buffer * const b){ char * z = NULL; if(0==buffer_materialize(b,0)){ z = (char *)b->mem; *b = fsl_buffer_empty; } return z; } fsl_size_t fsl_buffer_seek(fsl_buffer * const b, fsl_int_t offset, fsl_buffer_seek_e whence){ int64_t c = (int64_t)b->cursor; switch(whence){ case FSL_BUFFER_SEEK_SET: c = offset; case FSL_BUFFER_SEEK_CUR: c = (int64_t)b->cursor + offset; break; case FSL_BUFFER_SEEK_END: c = (int64_t)b->used + offset; /* ^^^^^ fossil(1) uses (used + offset - 1) but That seems somewhat arguable because (used + 0 - 1) is at the last-written byte (or 1 before the begining), not the one-past-the-end point (which corresponds to the "end-of-file" described by the fseek() man page). It then goes on, in other algos, to operate on that final byte using that position, e.g. blob_read() after a seek-to-end would read that last byte, rather than treating the buffer as being at the end. So... i'm going to naively remove that -1 bit. */ break; } if(!b->used || c<0) b->cursor = 0; else if((fsl_size_t)c > b->used) b->cursor = b->used; else b->cursor = (fsl_size_t)c; return b->cursor; } fsl_size_t fsl_buffer_tell(fsl_buffer const * const b){ return b->cursor; } void fsl_buffer_rewind(fsl_buffer * const b){ b->cursor = 0; } int fsl_id_bag_to_buffer(fsl_id_bag const * bag, fsl_buffer * b, char const * separator){ int i = 0; fsl_int_t const sepLen = (fsl_id_t)fsl_strlen(separator); int rc = fsl_buffer_reserve(b, b->used + (bag->entryCount * 7) + (bag->entryCount * sepLen)); for(fsl_id_t e = fsl_id_bag_first(bag); !rc && e; e = fsl_id_bag_next(bag, e)){ if(i++) rc = fsl_buffer_append(b, separator, sepLen); if(!rc) rc = fsl_buffer_appendf(b, "%" FSL_ID_T_PFMT, e); } return rc; } int fsl_buffer_append_tcl_literal(fsl_buffer * const b, char const * z, fsl_int_t n){ int rc; if(n<0) n = fsl_strlen(z); rc = fsl_buffer_append(b, "\"", 1); for(fsl_int_t i=0; 0==rc && i