/*
** Copyright (c) 2006 D. Richard Hipp
**
** This program is free software; you can redistribute it and/or
** modify it under the terms of the Simplified BSD License (also
** known as the "2-Clause License" or "FreeBSD License".)
**
** This program is distributed in the hope that it will be useful,
** but without any warranty; without even the implied warranty of
** merchantability or fitness for a particular purpose.
**
** Author contact information:
** drh@hwaci.com
** http://www.hwaci.com/drh/
**
*******************************************************************************
**
** A Blob is a variable-length containers for arbitrary string
** or binary data.
*/
#include "config.h"
#if defined(FOSSIL_ENABLE_MINIZ)
# define MINIZ_HEADER_FILE_ONLY
# include "miniz.c"
#else
# include <zlib.h>
#endif
#include "blob.h"
#if defined(_WIN32)
#include <fcntl.h>
#include <io.h>
#endif
#if INTERFACE
/*
** A Blob can hold a string or a binary object of arbitrary size. The
** size changes as necessary.
*/
struct Blob {
unsigned int nUsed; /* Number of bytes used in aData[] */
unsigned int nAlloc; /* Number of bytes allocated for aData[] */
unsigned int iCursor; /* Next character of input to parse */
unsigned int blobFlags; /* One or more BLOBFLAG_* bits */
char *aData; /* Where the information is stored */
void (*xRealloc)(Blob*, unsigned int); /* Function to reallocate the buffer */
};
/*
** Allowed values for Blob.blobFlags
*/
#define BLOBFLAG_NotSQL 0x0001 /* Non-SQL text */
/*
** The current size of a Blob
*/
#define blob_size(X) ((X)->nUsed)
/*
** The buffer holding the blob data
*/
#define blob_buffer(X) ((X)->aData)
/*
** Seek whence parameter values
*/
#define BLOB_SEEK_SET 1
#define BLOB_SEEK_CUR 2
#define BLOB_SEEK_END 3
#endif /* INTERFACE */
/*
** Make sure a blob is initialized
*/
#define blob_is_init(x) \
assert((x)->xRealloc==blobReallocMalloc || (x)->xRealloc==blobReallocStatic)
/*
** Make sure a blob does not contain malloced memory.
**
** This might fail if we are unlucky and x is uninitialized. For that
** reason it should only be used locally for debugging. Leave it turned
** off for production.
*/
#if 0 /* Enable for debugging only */
#define assert_blob_is_reset(x) assert(blob_is_reset(x))
#else
#define assert_blob_is_reset(x)
#endif
/*
** We find that the built-in isspace() function does not work for
** some international character sets. So here is a substitute.
*/
int fossil_isspace(char c){
return c==' ' || (c<='\r' && c>='\t');
}
/*
** Other replacements for ctype.h functions.
*/
int fossil_islower(char c){ return c>='a' && c<='z'; }
int fossil_isupper(char c){ return c>='A' && c<='Z'; }
int fossil_isdigit(char c){ return c>='0' && c<='9'; }
int fossil_tolower(char c){
return fossil_isupper(c) ? c - 'A' + 'a' : c;
}
int fossil_toupper(char c){
return fossil_islower(c) ? c - 'a' + 'A' : c;
}
int fossil_isalpha(char c){
return (c>='a' && c<='z') || (c>='A' && c<='Z');
}
int fossil_isalnum(char c){
return (c>='a' && c<='z') || (c>='A' && c<='Z') || (c>='0' && c<='9');
}
/* Return true if and only if the entire string consists of only
** alphanumeric characters.
*/
int fossil_no_strange_characters(const char *z){
while( z && (fossil_isalnum(z[0]) || z[0]=='_' || z[0]=='-') ) z++;
return z[0]==0;
}
/*
** COMMAND: test-isspace
**
** Verify that the fossil_isspace() routine is working correctly by
** testing it on all possible inputs.
*/
void isspace_cmd(void){
int i;
for(i=0; i<=255; i++){
if( i==' ' || i=='\n' || i=='\t' || i=='\v'
|| i=='\f' || i=='\r' ){
assert( fossil_isspace((char)i) );
}else{
assert( !fossil_isspace((char)i) );
}
}
fossil_print("All 256 characters OK\n");
}
/*
** This routine is called if a blob operation fails because we
** have run out of memory.
*/
static void blob_panic(void){
static const char zErrMsg[] = "out of memory\n";
fputs(zErrMsg, stderr);
fossil_exit(1);
}
/*
** A reallocation function that assumes that aData came from malloc().
** This function attempts to resize the buffer of the blob to hold
** newSize bytes.
**
** No attempt is made to recover from an out-of-memory error.
** If an OOM error occurs, an error message is printed on stderr
** and the program exits.
*/
void blobReallocMalloc(Blob *pBlob, unsigned int newSize){
if( newSize==0 ){
free(pBlob->aData);
pBlob->aData = 0;
pBlob->nAlloc = 0;
pBlob->nUsed = 0;
pBlob->iCursor = 0;
pBlob->blobFlags = 0;
}else if( newSize>pBlob->nAlloc || newSize<pBlob->nAlloc-4000 ){
char *pNew = fossil_realloc(pBlob->aData, newSize);
pBlob->aData = pNew;
pBlob->nAlloc = newSize;
if( pBlob->nUsed>pBlob->nAlloc ){
pBlob->nUsed = pBlob->nAlloc;
}
}
}
/*
** An initializer for Blobs
*/
#if INTERFACE
#define BLOB_INITIALIZER {0,0,0,0,0,blobReallocMalloc}
#endif
const Blob empty_blob = BLOB_INITIALIZER;
/*
** A reallocation function for when the initial string is in unmanaged
** space. Copy the string to memory obtained from malloc().
*/
static void blobReallocStatic(Blob *pBlob, unsigned int newSize){
if( newSize==0 ){
*pBlob = empty_blob;
}else{
char *pNew = fossil_malloc( newSize );
if( pBlob->nUsed>newSize ) pBlob->nUsed = newSize;
memcpy(pNew, pBlob->aData, pBlob->nUsed);
pBlob->aData = pNew;
pBlob->xRealloc = blobReallocMalloc;
pBlob->nAlloc = newSize;
}
}
/*
** Reset a blob to be an empty container.
*/
void blob_reset(Blob *pBlob){
blob_is_init(pBlob);
pBlob->xRealloc(pBlob, 0);
}
/*
** Return true if the blob has been zeroed - in other words if it contains
** no malloced memory. This only works reliably if the blob has been
** initialized - it can return a false negative on an uninitialized blob.
*/
int blob_is_reset(Blob *pBlob){
if( pBlob==0 ) return 1;
if( pBlob->nUsed ) return 0;
if( pBlob->xRealloc==blobReallocMalloc && pBlob->nAlloc ) return 0;
return 1;
}
/*
** Initialize a blob to a string or byte-array constant of a specified length.
** Any prior data in the blob is discarded.
*/
void blob_init(Blob *pBlob, const char *zData, int size){
assert_blob_is_reset(pBlob);
if( zData==0 ){
*pBlob = empty_blob;
}else{
if( size<=0 ) size = strlen(zData);
pBlob->nUsed = pBlob->nAlloc = size;
pBlob->aData = (char*)zData;
pBlob->iCursor = 0;
pBlob->blobFlags = 0;
pBlob->xRealloc = blobReallocStatic;
}
}
/*
** Initialize a blob to a nul-terminated string.
** Any prior data in the blob is discarded.
*/
void blob_set(Blob *pBlob, const char *zStr){
blob_init(pBlob, zStr, -1);
}
/*
** Initialize a blob to a nul-terminated string obtained from fossil_malloc().
** The blob will take responsibility for freeing the string.
*/
void blob_set_dynamic(Blob *pBlob, char *zStr){
blob_init(pBlob, zStr, -1);
pBlob->xRealloc = blobReallocMalloc;
}
/*
** Initialize a blob to an empty string.
*/
void blob_zero(Blob *pBlob){
static const char zEmpty[] = "";
assert_blob_is_reset(pBlob);
pBlob->nUsed = 0;
pBlob->nAlloc = 1;
pBlob->aData = (char*)zEmpty;
pBlob->iCursor = 0;
pBlob->blobFlags = 0;
pBlob->xRealloc = blobReallocStatic;
}
/*
** Append text or data to the end of a blob.
**
** The blob_append_full() routine is a complete implementation.
** The blob_append() routine only works for cases where nData>0 and
** no resizing is required, and falls back to blob_append_full() if
** either condition is not met, but runs faster in the common case
** where all conditions are met. The use of blob_append() is
** recommended, unless it is known in advance that nData<0.
*/
void blob_append_full(Blob *pBlob, const char *aData, int nData){
sqlite3_int64 nNew;
assert( aData!=0 || nData==0 );
blob_is_init(pBlob);
if( nData<0 ) nData = strlen(aData);
if( nData==0 ) return;
nNew = pBlob->nUsed;
nNew += nData;
if( nNew >= pBlob->nAlloc ){
nNew += pBlob->nAlloc;
nNew += 100;
if( nNew>=0x7fff0000 ){
blob_panic();
}
pBlob->xRealloc(pBlob, (int)nNew);
if( pBlob->nUsed + nData >= pBlob->nAlloc ){
blob_panic();
}
}
memcpy(&pBlob->aData[pBlob->nUsed], aData, nData);
pBlob->nUsed += nData;
pBlob->aData[pBlob->nUsed] = 0; /* Blobs are always nul-terminated */
}
void blob_append(Blob *pBlob, const char *aData, int nData){
sqlite3_int64 nUsed;
assert( aData!=0 || nData==0 );
/* blob_is_init(pBlob); // omitted for speed */
if( nData<=0 || pBlob->nUsed + nData >= pBlob->nAlloc ){
blob_append_full(pBlob, aData, nData);
return;
}
nUsed = pBlob->nUsed;
pBlob->nUsed += nData;
pBlob->aData[pBlob->nUsed] = 0;
memcpy(&pBlob->aData[nUsed], aData, nData);
}
/*
** Append a string literal to a blob.
*/
#if INTERFACE
#define blob_append_string(BLOB,STR) blob_append(BLOB,STR,sizeof(STR)-1)
#endif
/*
** Append a single character to the blob
*/
void blob_append_char(Blob *pBlob, char c){
if( pBlob->nUsed+1 >= pBlob->nAlloc ){
blob_append_full(pBlob, &c, 1);
}else{
pBlob->aData[pBlob->nUsed++] = c;
}
}
/*
** Copy a blob
*/
void blob_copy(Blob *pTo, Blob *pFrom){
blob_is_init(pFrom);
blob_zero(pTo);
blob_append(pTo, blob_buffer(pFrom), blob_size(pFrom));
}
/*
** Return a pointer to a null-terminated string for a blob.
*/
char *blob_str(Blob *p){
blob_is_init(p);
if( p->nUsed==0 ){
blob_append_char(p, 0); /* NOTE: Changes nUsed. */
p->nUsed = 0;
}
if( p->nUsed<p->nAlloc ){
p->aData[p->nUsed] = 0;
}else{
blob_materialize(p);
}
return p->aData;
}
/*
** Return a pointer to a null-terminated string for a blob that has
** been created using blob_append_sql() and not blob_appendf(). If
** text was ever added using blob_appendf() then throw an error.
*/
char *blob_sql_text(Blob *p){
blob_is_init(p);
if( (p->blobFlags & BLOBFLAG_NotSQL) ){
fossil_panic("use of blob_appendf() to construct SQL text");
}
return blob_str(p);
}
/*
** Return a pointer to a null-terminated string for a blob.
**
** WARNING: If the blob is ephemeral, it might cause a '\000'
** character to be inserted into the middle of the parent blob.
** Example: Suppose p is a token extracted from some larger
** blob pBig using blob_token(). If you call this routine on p,
** then a '\000' character will be inserted in the middle of
** pBig in order to cause p to be nul-terminated. If pBig
** should not be modified, then use blob_str() instead of this
** routine. blob_str() will make a copy of the p if necessary
** to avoid modifying pBig.
*/
char *blob_terminate(Blob *p){
blob_is_init(p);
if( p->nUsed==0 ) return "";
p->aData[p->nUsed] = 0;
return p->aData;
}
/*
** Compare two blobs. Return negative, zero, or positive if the first
** blob is less then, equal to, or greater than the second.
*/
int blob_compare(Blob *pA, Blob *pB){
int szA, szB, sz, rc;
blob_is_init(pA);
blob_is_init(pB);
szA = blob_size(pA);
szB = blob_size(pB);
sz = szA<szB ? szA : szB;
rc = memcmp(blob_buffer(pA), blob_buffer(pB), sz);
if( rc==0 ){
rc = szA - szB;
}
return rc;
}
/*
** Compare two blobs in constant time and return zero if they are equal.
** Constant time comparison only applies for blobs of the same length.
** If lengths are different, immediately returns 1.
*/
int blob_constant_time_cmp(Blob *pA, Blob *pB){
int szA, szB, i;
unsigned char *buf1, *buf2;
unsigned char rc = 0;
blob_is_init(pA);
blob_is_init(pB);
szA = blob_size(pA);
szB = blob_size(pB);
if( szA!=szB || szA==0 ) return 1;
buf1 = (unsigned char*)blob_buffer(pA);
buf2 = (unsigned char*)blob_buffer(pB);
for( i=0; i<szA; i++ ){
rc = rc | (buf1[i] ^ buf2[i]);
}
return rc;
}
/*
** Compare a blob to a string. Return TRUE if they are equal.
*/
int blob_eq_str(Blob *pBlob, const char *z, int n){
Blob t;
blob_is_init(pBlob);
if( n<=0 ) n = strlen(z);
t.aData = (char*)z;
t.nUsed = n;
t.xRealloc = blobReallocStatic;
return blob_compare(pBlob, &t)==0;
}
/*
** This macro compares a blob against a string constant. We use the sizeof()
** operator on the string constant twice, so it really does need to be a
** string literal or character array - not a character pointer.
*/
#if INTERFACE
# define blob_eq(B,S) \
((B)->nUsed==sizeof(S)-1 && memcmp((B)->aData,S,sizeof(S)-1)==0)
#endif
/*
** Attempt to resize a blob so that its internal buffer is
** nByte in size. The blob is truncated if necessary.
*/
void blob_resize(Blob *pBlob, unsigned int newSize){
pBlob->xRealloc(pBlob, newSize+1);
pBlob->nUsed = newSize;
pBlob->aData[newSize] = 0;
}
/*
** Ensures that the given blob has at least the given amount of memory
** allocated to it. Does not modify pBlob->nUsed nor will it reduce
** the currently-allocated amount of memory.
**
** For semantic compatibility with blob_append_full(), if newSize is
** >=0x7fff000 (~2GB) then this function will trigger blob_panic(). If
** it didn't, it would be possible to bypass that hard-coded limit via
** this function.
**
** We've had at least one report:
** https://fossil-scm.org/forum/forumpost/b7bbd28db4
** which implies that this is unconditionally failing on mingw 32-bit
** builds.
*/
void blob_reserve(Blob *pBlob, unsigned int newSize){
if(newSize>=0x7fff0000 ){
blob_panic();
}else if(newSize>pBlob->nUsed){
pBlob->xRealloc(pBlob, newSize);
pBlob->aData[newSize] = 0;
}
}
/*
** Make sure a blob is nul-terminated and is not a pointer to unmanaged
** space. Return a pointer to the data.
*/
char *blob_materialize(Blob *pBlob){
blob_resize(pBlob, pBlob->nUsed);
return pBlob->aData;
}
/*
** Call dehttpize on a blob. This causes an ephemeral blob to be
** materialized.
*/
void blob_dehttpize(Blob *pBlob){
blob_materialize(pBlob);
pBlob->nUsed = dehttpize(pBlob->aData);
}
/*
** Extract N bytes from blob pFrom and use it to initialize blob pTo.
** Return the actual number of bytes extracted.
**
** After this call completes, pTo will be an ephemeral blob.
*/
int blob_extract(Blob *pFrom, int N, Blob *pTo){
blob_is_init(pFrom);
assert_blob_is_reset(pTo);
if( pFrom->iCursor + N > pFrom->nUsed ){
N = pFrom->nUsed - pFrom->iCursor;
if( N<=0 ){
blob_zero(pTo);
return 0;
}
}
pTo->nUsed = N;
pTo->nAlloc = N;
pTo->aData = &pFrom->aData[pFrom->iCursor];
pTo->iCursor = 0;
pTo->xRealloc = blobReallocStatic;
pFrom->iCursor += N;
return N;
}
/*
** Rewind the cursor on a blob back to the beginning.
*/
void blob_rewind(Blob *p){
p->iCursor = 0;
}
/*
** Truncate a blob back to zero length
*/
void blob_truncate(Blob *p, int sz){
if( sz>=0 && sz<p->nUsed ) p->nUsed = sz;
}
/*
** Seek the cursor in a blob to the indicated offset.
*/
int blob_seek(Blob *p, int offset, int whence){
if( whence==BLOB_SEEK_SET ){
p->iCursor = offset;
}else if( whence==BLOB_SEEK_CUR ){
p->iCursor += offset;
}else if( whence==BLOB_SEEK_END ){
p->iCursor = p->nUsed + offset - 1;
}
if( p->iCursor>p->nUsed ){
p->iCursor = p->nUsed;
}
return p->iCursor;
}
/*
** Return the current offset into the blob
*/
int blob_tell(Blob *p){
return p->iCursor;
}
/*
** Extract a single line of text from pFrom beginning at the current
** cursor location and use that line of text to initialize pTo.
** pTo will include the terminating \n. Return the number of bytes
** in the line including the \n at the end. 0 is returned at
** end-of-file.
**
** The cursor of pFrom is left pointing at the first byte past the
** \n that terminated the line.
**
** pTo will be an ephermeral blob. If pFrom changes, it might alter
** pTo as well.
*/
int blob_line(Blob *pFrom, Blob *pTo){
char *aData = pFrom->aData;
int n = pFrom->nUsed;
int i = pFrom->iCursor;
while( i<n && aData[i]!='\n' ){ i++; }
if( i<n ){
assert( aData[i]=='\n' );
i++;
}
blob_extract(pFrom, i-pFrom->iCursor, pTo);
return pTo->nUsed;
}
/*
** Trim whitespace off of the end of a blob. Return the number
** of characters remaining.
**
** All this does is reduce the length counter. This routine does
** not insert a new zero terminator.
*/
int blob_trim(Blob *p){
char *z = p->aData;
int n = p->nUsed;
while( n>0 && fossil_isspace(z[n-1]) ){ n--; }
p->nUsed = n;
return n;
}
/*
** Extract a single token from pFrom and use it to initialize pTo.
** Return the number of bytes in the token. If no token is found,
** return 0.
**
** A token consists of one or more non-space characters. Leading
** whitespace is ignored.
**
** The cursor of pFrom is left pointing at the first character past
** the end of the token.
**
** pTo will be an ephermeral blob. If pFrom changes, it might alter
** pTo as well.
*/
int blob_token(Blob *pFrom, Blob *pTo){
char *aData = pFrom->aData;
int n = pFrom->nUsed;
int i = pFrom->iCursor;
while( i<n && fossil_isspace(aData[i]) ){ i++; }
pFrom->iCursor = i;
while( i<n && !fossil_isspace(aData[i]) ){ i++; }
blob_extract(pFrom, i-pFrom->iCursor, pTo);
while( i<n && fossil_isspace(aData[i]) ){ i++; }
pFrom->iCursor = i;
return pTo->nUsed;
}
/*
** Extract a single SQL token from pFrom and use it to initialize pTo.
** Return the number of bytes in the token. If no token is found,
** return 0.
**
** An SQL token consists of one or more non-space characters. If the
** first character is ' then the token is terminated by a matching '
** (ignoring double '') or by the end of the string
**
** The cursor of pFrom is left pointing at the first character past
** the end of the token.
**
** pTo will be an ephermeral blob. If pFrom changes, it might alter
** pTo as well.
*/
int blob_sqltoken(Blob *pFrom, Blob *pTo){
char *aData = pFrom->aData;
int n = pFrom->nUsed;
int i = pFrom->iCursor;
while( i<n && fossil_isspace(aData[i]) ){ i++; }
pFrom->iCursor = i;
if( aData[i]=='\'' ){
i++;
while( i<n ){
if( aData[i]=='\'' ){
if( aData[++i]!='\'' ) break;
}
i++;
}
}else{
while( i<n && !fossil_isspace(aData[i]) ){ i++; }
}
blob_extract(pFrom, i-pFrom->iCursor, pTo);
while( i<n && fossil_isspace(aData[i]) ){ i++; }
pFrom->iCursor = i;
return pTo->nUsed;
}
/*
** Extract everything from the current cursor to the end of the blob
** into a new blob. The new blob is an ephemerial reference to the
** original blob. The cursor of the original blob is unchanged.
*/
int blob_tail(Blob *pFrom, Blob *pTo){
int iCursor = pFrom->iCursor;
blob_extract(pFrom, pFrom->nUsed-pFrom->iCursor, pTo);
pFrom->iCursor = iCursor;
return pTo->nUsed;
}
/*
** Copy N lines of text from pFrom into pTo. The copy begins at the
** current cursor position of pIn. The pIn cursor is left pointing
** at the first character past the last \n copied.
**
** If pTo==NULL then this routine simply skips over N lines.
*/
void blob_copy_lines(Blob *pTo, Blob *pFrom, int N){
char *z = pFrom->aData;
int i = pFrom->iCursor;
int n = pFrom->nUsed;
int cnt = 0;
if( N==0 ) return;
while( i<n ){
if( z[i]=='\n' ){
cnt++;
if( cnt==N ){
i++;
break;
}
}
i++;
}
if( pTo ){
blob_append(pTo, &pFrom->aData[pFrom->iCursor], i - pFrom->iCursor);
}
pFrom->iCursor = i;
}
/*
** Ensure that the text in pBlob ends with '\n'
*/
void blob_add_final_newline(Blob *pBlob){
if( pBlob->nUsed<=0 ) return;
if( pBlob->aData[pBlob->nUsed-1]!='\n' ){
blob_append_char(pBlob, '\n');
}
}
/*
** Return true if the blob contains a valid base16 identifier artifact hash.
**
** The value returned is actually one of HNAME_SHA1 OR HNAME_K256 if the
** hash is valid. Both of these are non-zero and therefore "true".
** If the hash is not valid, then HNAME_ERROR is returned, which is zero or
** false.
*/
int blob_is_hname(Blob *pBlob){
return hname_validate(blob_buffer(pBlob), blob_size(pBlob));
}
/*
** Return true if the blob contains a valid filename
*/
int blob_is_filename(Blob *pBlob){
return file_is_simple_pathname(blob_str(pBlob), 1);
}
/*
** Return true if the blob contains a valid 32-bit integer. Store
** the integer value in *pValue.
*/
int blob_is_int(Blob *pBlob, int *pValue){
const char *z = blob_buffer(pBlob);
int i, n, c, v;
n = blob_size(pBlob);
v = 0;
for(i=0; i<n && (c = z[i])!=0 && c>='0' && c<='9'; i++){
v = v*10 + c - '0';
}
if( i==n ){
*pValue = v;
return 1;
}else{
return 0;
}
}
/*
** Return true if the blob contains a valid 64-bit integer. Store
** the integer value in *pValue.
*/
int blob_is_int64(Blob *pBlob, sqlite3_int64 *pValue){
const char *z = blob_buffer(pBlob);
int i, n, c;
sqlite3_int64 v;
n = blob_size(pBlob);
v = 0;
for(i=0; i<n && (c = z[i])!=0 && c>='0' && c<='9'; i++){
v = v*10 + c - '0';
}
if( i==n ){
*pValue = v;
return 1;
}else{
return 0;
}
}
/*
** Zero or reset an array of Blobs.
*/
void blobarray_zero(Blob *aBlob, int n){
int i;
for(i=0; i<n; i++) blob_zero(&aBlob[i]);
}
void blobarray_reset(Blob *aBlob, int n){
int i;
for(i=0; i<n; i++) blob_reset(&aBlob[i]);
}
/*
** Parse a blob into space-separated tokens. Store each token in
** an element of the blobarray aToken[]. aToken[] is nToken elements in
** size. Return the number of tokens seen.
*/
int blob_tokenize(Blob *pIn, Blob *aToken, int nToken){
int i;
for(i=0; i<nToken && blob_token(pIn, &aToken[i]); i++){}
return i;
}
/*
** Do printf-style string rendering and append the results to a blob.
**
** The blob_appendf() version sets the BLOBFLAG_NotSQL bit in Blob.blobFlags
** whereas blob_append_sql() does not.
*/
void blob_appendf(Blob *pBlob, const char *zFormat, ...){
if( pBlob ){
va_list ap;
va_start(ap, zFormat);
vxprintf(pBlob, zFormat, ap);
va_end(ap);
pBlob->blobFlags |= BLOBFLAG_NotSQL;
}
}
void blob_append_sql(Blob *pBlob, const char *zFormat, ...){
if( pBlob ){
va_list ap;
va_start(ap, zFormat);
vxprintf(pBlob, zFormat, ap);
va_end(ap);
}
}
void blob_vappendf(Blob *pBlob, const char *zFormat, va_list ap){
if( pBlob ) vxprintf(pBlob, zFormat, ap);
}
/*
** Initialize a blob to the data on an input channel. Return
** the number of bytes read into the blob. Any prior content
** of the blob is discarded, not freed.
*/
int blob_read_from_channel(Blob *pBlob, FILE *in, int nToRead){
size_t n;
blob_zero(pBlob);
if( nToRead<0 ){
char zBuf[10000];
while( !feof(in) ){
n = fread(zBuf, 1, sizeof(zBuf), in);
if( n>0 ){
blob_append(pBlob, zBuf, n);
}
}
}else{
blob_resize(pBlob, nToRead);
n = fread(blob_buffer(pBlob), 1, nToRead, in);
blob_resize(pBlob, n);
}
return blob_size(pBlob);
}
/*
** Initialize a blob to be the content of a file. If the filename
** is blank or "-" then read from standard input.
**
** If zFilename is a symbolic link, behavior depends on the eFType
** parameter:
**
** * If eFType is ExtFILE or allow-symlinks is OFF, then the
** pBlob is initialized to the *content* of the object to which
** the zFilename symlink points.
**
** * If eFType is RepoFILE and allow-symlinks is ON, then the
** pBlob is initialized to the *name* of the object to which
** the zFilename symlink points.
**
** Any prior content of the blob is discarded, not freed.
**
** Return the number of bytes read. Calls fossil_fatal() on error (i.e.
** it exit()s and does not return).
*/
sqlite3_int64 blob_read_from_file(
Blob *pBlob, /* The blob to be initialized */
const char *zFilename, /* Extract content from this file */
int eFType /* ExtFILE or RepoFILE - see above */
){
sqlite3_int64 size, got;
FILE *in;
if( zFilename==0 || zFilename[0]==0
|| (zFilename[0]=='-' && zFilename[1]==0) ){
return blob_read_from_channel(pBlob, stdin, -1);
}
if( file_islink(zFilename) ){
return blob_read_link(pBlob, zFilename);
}
size = file_size(zFilename, eFType);
blob_zero(pBlob);
if( size<0 ){
fossil_fatal("no such file: %s", zFilename);
}
if( size==0 ){
return 0;
}
blob_resize(pBlob, size);
in = fossil_fopen(zFilename, "rb");
if( in==0 ){
fossil_fatal("cannot open %s for reading", zFilename);
}
got = fread(blob_buffer(pBlob), 1, size, in);
fclose(in);
if( got<size ){
blob_resize(pBlob, got);
}
return got;
}
/*
** Reads symlink destination path and puts int into blob.
** Any prior content of the blob is discarded, not freed.
**
** Returns length of destination path.
**
** On windows, zeros blob and returns 0.
*/
int blob_read_link(Blob *pBlob, const char *zFilename){
#if !defined(_WIN32)
char zBuf[1024];
ssize_t len = readlink(zFilename, zBuf, 1023);
if( len < 0 ){
fossil_fatal("cannot read symbolic link %s", zFilename);
}
zBuf[len] = 0; /* null-terminate */
blob_zero(pBlob);
blob_appendf(pBlob, "%s", zBuf);
return len;
#else
blob_zero(pBlob);
return 0;
#endif
}
/*
** Write the content of a blob into a file.
**
** If the filename is blank or "-" then write to standard output.
**
** This routine always assumes ExtFILE. If zFilename is a symbolic link
** then the content is written into the object that symbolic link points
** to, not into the symbolic link itself. This is true regardless of
** the allow-symlinks setting.
**
** Return the number of bytes written.
*/
int blob_write_to_file(Blob *pBlob, const char *zFilename){
FILE *out;
int nWrote;
if( zFilename[0]==0 || (zFilename[0]=='-' && zFilename[1]==0) ){
blob_is_init(pBlob);
#if defined(_WIN32)
nWrote = fossil_utf8_to_console(blob_buffer(pBlob), blob_size(pBlob), 0);
if( nWrote>=0 ) return nWrote;
fflush(stdout);
_setmode(_fileno(stdout), _O_BINARY);
#endif
nWrote = fwrite(blob_buffer(pBlob), 1, blob_size(pBlob), stdout);
#if defined(_WIN32)
fflush(stdout);
_setmode(_fileno(stdout), _O_TEXT);
#endif
}else{
file_mkfolder(zFilename, ExtFILE, 1, 0);
out = fossil_fopen(zFilename, "wb");
if( out==0 ){
#if _WIN32
const char *zReserved = file_is_win_reserved(zFilename);
if( zReserved ){
fossil_fatal("cannot open \"%s\" because \"%s\" is "
"a reserved name on Windows", zFilename, zReserved);
}
#endif
fossil_fatal_recursive("unable to open file \"%s\" for writing",
zFilename);
return 0;
}
blob_is_init(pBlob);
nWrote = fwrite(blob_buffer(pBlob), 1, blob_size(pBlob), out);
fclose(out);
if( nWrote!=blob_size(pBlob) ){
fossil_fatal_recursive("short write: %d of %d bytes to %s", nWrote,
blob_size(pBlob), zFilename);
}
}
return nWrote;
}
/*
** Compress a blob pIn. Store the result in pOut. It is ok for pIn and
** pOut to be the same blob.
**
** pOut must either be the same as pIn or else uninitialized.
*/
void blob_compress(Blob *pIn, Blob *pOut){
unsigned int nIn = blob_size(pIn);
unsigned int nOut = 13 + nIn + (nIn+999)/1000;
unsigned long int nOut2;
unsigned char *outBuf;
Blob temp;
blob_zero(&temp);
blob_resize(&temp, nOut+4);
outBuf = (unsigned char*)blob_buffer(&temp);
outBuf[0] = nIn>>24 & 0xff;
outBuf[1] = nIn>>16 & 0xff;
outBuf[2] = nIn>>8 & 0xff;
outBuf[3] = nIn & 0xff;
nOut2 = (long int)nOut;
compress(&outBuf[4], &nOut2,
(unsigned char*)blob_buffer(pIn), blob_size(pIn));
if( pOut==pIn ) blob_reset(pOut);
assert_blob_is_reset(pOut);
*pOut = temp;
blob_resize(pOut, nOut2+4);
}
/*
** COMMAND: test-compress
**
** Usage: %fossil test-compress INPUTFILE OUTPUTFILE
**
** Run compression on INPUTFILE and write the result into OUTPUTFILE.
**
** This is used to test and debug the blob_compress() routine.
*/
void compress_cmd(void){
Blob f;
if( g.argc!=4 ) usage("INPUTFILE OUTPUTFILE");
blob_read_from_file(&f, g.argv[2], ExtFILE);
blob_compress(&f, &f);
blob_write_to_file(&f, g.argv[3]);
}
/*
** Compress the concatenation of a blobs pIn1 and pIn2. Store the result
** in pOut.
**
** pOut must be either uninitialized or must be the same as either pIn1 or
** pIn2.
*/
void blob_compress2(Blob *pIn1, Blob *pIn2, Blob *pOut){
unsigned int nIn = blob_size(pIn1) + blob_size(pIn2);
unsigned int nOut = 13 + nIn + (nIn+999)/1000;
unsigned char *outBuf;
z_stream stream;
Blob temp;
blob_zero(&temp);
blob_resize(&temp, nOut+4);
outBuf = (unsigned char*)blob_buffer(&temp);
outBuf[0] = nIn>>24 & 0xff;
outBuf[1] = nIn>>16 & 0xff;
outBuf[2] = nIn>>8 & 0xff;
outBuf[3] = nIn & 0xff;
stream.zalloc = (alloc_func)0;
stream.zfree = (free_func)0;
stream.opaque = 0;
stream.avail_out = nOut;
stream.next_out = &outBuf[4];
deflateInit(&stream, 9);
stream.avail_in = blob_size(pIn1);
stream.next_in = (unsigned char*)blob_buffer(pIn1);
deflate(&stream, 0);
stream.avail_in = blob_size(pIn2);
stream.next_in = (unsigned char*)blob_buffer(pIn2);
deflate(&stream, 0);
deflate(&stream, Z_FINISH);
blob_resize(&temp, stream.total_out + 4);
deflateEnd(&stream);
if( pOut==pIn1 ) blob_reset(pOut);
if( pOut==pIn2 ) blob_reset(pOut);
assert_blob_is_reset(pOut);
*pOut = temp;
}
/*
** COMMAND: test-compress-2
**
** Usage: %fossil test-compress-2 IN1 IN2 OUT
**
** Read files IN1 and IN2, concatenate the content, compress the
** content, then write results into OUT.
**
** This is used to test and debug the blob_compress2() routine.
*/
void compress2_cmd(void){
Blob f1, f2;
if( g.argc!=5 ) usage("INPUTFILE1 INPUTFILE2 OUTPUTFILE");
blob_read_from_file(&f1, g.argv[2], ExtFILE);
blob_read_from_file(&f2, g.argv[3], ExtFILE);
blob_compress2(&f1, &f2, &f1);
blob_write_to_file(&f1, g.argv[4]);
}
/*
** Uncompress blob pIn and store the result in pOut. It is ok for pIn and
** pOut to be the same blob.
**
** pOut must be either uninitialized or the same as pIn.
*/
int blob_uncompress(Blob *pIn, Blob *pOut){
unsigned int nOut;
unsigned char *inBuf;
unsigned int nIn = blob_size(pIn);
Blob temp;
int rc;
unsigned long int nOut2;
if( nIn<=4 ){
return 0;
}
inBuf = (unsigned char*)blob_buffer(pIn);
nOut = (inBuf[0]<<24) + (inBuf[1]<<16) + (inBuf[2]<<8) + inBuf[3];
blob_zero(&temp);
blob_resize(&temp, nOut+1);
nOut2 = (long int)nOut;
rc = uncompress((unsigned char*)blob_buffer(&temp), &nOut2,
&inBuf[4], nIn - 4);
if( rc!=Z_OK ){
blob_reset(&temp);
return 1;
}
blob_resize(&temp, nOut2);
if( pOut==pIn ) blob_reset(pOut);
assert_blob_is_reset(pOut);
*pOut = temp;
return 0;
}
/*
** COMMAND: test-uncompress
**
** Usage: %fossil test-uncompress IN OUT
**
** Read the content of file IN, uncompress that content, and write the
** result into OUT. This command is intended for testing of the
** blob_compress() function.
*/
void uncompress_cmd(void){
Blob f;
if( g.argc!=4 ) usage("INPUTFILE OUTPUTFILE");
blob_read_from_file(&f, g.argv[2], ExtFILE);
blob_uncompress(&f, &f);
blob_write_to_file(&f, g.argv[3]);
}
/*
** COMMAND: test-cycle-compress
**
** Compress and uncompress each file named on the command line.
** Verify that the original content is recovered.
*/
void test_cycle_compress(void){
int i;
Blob b1, b2, b3;
for(i=2; i<g.argc; i++){
blob_read_from_file(&b1, g.argv[i], ExtFILE);
blob_compress(&b1, &b2);
blob_uncompress(&b2, &b3);
if( blob_compare(&b1, &b3) ){
fossil_fatal("compress/uncompress cycle failed for %s", g.argv[i]);
}
blob_reset(&b1);
blob_reset(&b2);
blob_reset(&b3);
}
fossil_print("ok\n");
}
/*
** Convert every \n character in the given blob into \r\n.
*/
void blob_add_cr(Blob *p){
char *z = p->aData;
int j = p->nUsed;
int i, n;
for(i=n=0; i<j; i++){
if( z[i]=='\n' ) n++;
}
j += n;
if( j>=p->nAlloc ){
blob_resize(p, j);
z = p->aData;
}
p->nUsed = j;
z[j] = 0;
while( j>i ){
if( (z[--j] = z[--i]) =='\n' ){
z[--j] = '\r';
}
}
}
/*
** Remove every \r character from the given blob, replacing each one with
** a \n character if it was not already part of a \r\n pair.
*/
void blob_to_lf_only(Blob *p){
int i, j;
char *z = blob_materialize(p);
for(i=j=0; z[i]; i++){
if( z[i]!='\r' ) z[j++] = z[i];
else if( z[i+1]!='\n' ) z[j++] = '\n';
}
z[j] = 0;
p->nUsed = j;
}
/*
** Convert blob from cp1252 to UTF-8. As cp1252 is a superset
** of iso8859-1, this is useful on UNIX as well.
**
** This table contains the character translations for 0x80..0xA0.
*/
static const unsigned short cp1252[32] = {
0x20ac, 0x81, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x8D, 0x017D, 0x8F,
0x90, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
0x2DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x9D, 0x017E, 0x0178
};
void blob_cp1252_to_utf8(Blob *p){
unsigned char *z = (unsigned char *)p->aData;
int j = p->nUsed;
int i, n;
for(i=n=0; i<j; i++){
if( z[i]>=0x80 ){
if( (z[i]<0xa0) && (cp1252[z[i]&0x1f]>=0x800) ){
n++;
}
n++;
}
}
j += n;
if( j>=p->nAlloc ){
blob_resize(p, j);
z = (unsigned char *)p->aData;
}
p->nUsed = j;
z[j] = 0;
while( j>i ){
if( z[--i]>=0x80 ){
if( z[i]<0xa0 ){
unsigned short sym = cp1252[z[i]&0x1f];
if( sym>=0x800 ){
z[--j] = 0x80 | (sym&0x3f);
z[--j] = 0x80 | ((sym>>6)&0x3f);
z[--j] = 0xe0 | (sym>>12);
}else{
z[--j] = 0x80 | (sym&0x3f);
z[--j] = 0xc0 | (sym>>6);
}
}else{
z[--j] = 0x80 | (z[i]&0x3f);
z[--j] = 0xC0 | (z[i]>>6);
}
}else{
z[--j] = z[i];
}
}
}
/*
** pBlob is a shell command under construction. This routine safely
** appends argument zIn.
**
** The argument is escaped if it contains white space or other characters
** that need to be escaped for the shell. If zIn contains characters
** that cannot be safely escaped, then throw a fatal error.
**
** The argument is expected to a filename of some kinds. As shell commands
** commonly have command-line options that begin with "-" and since we
** do not want an attacker to be able to invoke these switches using
** filenames that begin with "-", if zIn begins with "-", prepend
** an additional "./".
*/
void blob_append_escaped_arg(Blob *pBlob, const char *zIn){
int i;
char c;
int needEscape = 0;
int n = blob_size(pBlob);
char *z = blob_buffer(pBlob);
#if defined(_WIN32)
const char cDirSep = '\\'; /* Use \ as directory separator */
const char cQuote = '"'; /* Use "..." quoting on windows */
const char cEscape = '^'; /* Use ^X escaping on windows */
#else
const char cDirSep = '/'; /* Use / as directory separator */
const char cQuote = '\''; /* Use '...' quoting on unix */
const char cEscape = '\\'; /* Use \X escaping on unix */
#endif
for(i=0; (c = zIn[i])!=0; i++){
if( c==cQuote || (unsigned char)c<' ' ||
c==cEscape || c==';' || c=='*' || c=='?' || c=='[' ){
Blob bad;
blob_token(pBlob, &bad);
fossil_fatal("the [%s] argument to the \"%s\" command contains "
"a character (ascii 0x%02x) that is a security risk",
zIn, blob_str(&bad), c);
}
if( !needEscape && !fossil_isalnum(c) && c!=cDirSep && c!='.' && c!='_' ){
needEscape = 1;
}
}
if( n>0 && !fossil_isspace(z[n-1]) ){
blob_append_char(pBlob, ' ');
}
if( needEscape ) blob_append_char(pBlob, cQuote);
if( zIn[0]=='-' ){
blob_append_char(pBlob, '.');
blob_append_char(pBlob, cDirSep);
#if defined(_WIN32)
}else if( zIn[0]=='/' ){
blob_append_char(pBlob, '.');
#endif
}
#if defined(_WIN32)
if( needEscape ){
for(i=0; (c = zIn[i])!=0; i++){
if( c==cQuote ) blob_append_char(pBlob, cDirSep);
blob_append_char(pBlob, c);
}
}else{
blob_append(pBlob, zIn, -1);
}
#else
blob_append(pBlob, zIn, -1);
#endif
if( needEscape ){
#if defined(_WIN32)
/* NOTE: Trailing backslash must be doubled before final double quote. */
if( pBlob->aData[pBlob->nUsed-1]==cDirSep ){
blob_append_char(pBlob, cDirSep);
}
#endif
blob_append_char(pBlob, cQuote);
}
}
/*
** COMMAND: test-escaped-arg
**
** Usage %fossil ARG ...
**
** Run each argment through blob_append_escaped_arg() and show the
** result. Append each argument to "fossil test-echo" and run that
** using fossil_system() to verify that it really does get escaped
** correctly.
*/
void test_escaped_arg__cmd(void){
int i;
Blob x;
blob_init(&x, 0, 0);
for(i=2; i<g.argc; i++){
fossil_print("%3d [%s]: ", i, g.argv[i]);
blob_appendf(&x, "fossil test-echo %$", g.argv[i]);
fossil_print("%s\n", blob_str(&x));
fossil_system(blob_str(&x));
blob_reset(&x);
}
}
/*
** A read(2)-like impl for the Blob class. Reads (copies) up to nLen
** bytes from pIn, starting at position pIn->iCursor, and copies them
** to pDest (which must be valid memory at least nLen bytes long).
**
** Returns the number of bytes read/copied, which may be less than
** nLen (if end-of-blob is encountered).
**
** Updates pIn's cursor.
**
** Returns 0 if pIn contains no data.
*/
unsigned int blob_read(Blob *pIn, void * pDest, unsigned int nLen ){
if( !pIn->aData || (pIn->iCursor >= pIn->nUsed) ){
return 0;
} else if( (pIn->iCursor + nLen) > (unsigned int)pIn->nUsed ){
nLen = (unsigned int) (pIn->nUsed - pIn->iCursor);
}
assert( pIn->nUsed > pIn->iCursor );
assert( (pIn->iCursor+nLen) <= pIn->nUsed );
if( nLen ){
memcpy( pDest, pIn->aData, nLen );
pIn->iCursor += nLen;
}
return nLen;
}
/*
** Swaps the contents of the given blobs. Results
** are unspecified if either value is NULL or both
** point to the same blob.
*/
void blob_swap( Blob *pLeft, Blob *pRight ){
Blob swap = *pLeft;
*pLeft = *pRight;
*pRight = swap;
}
/*
** Strip a possible byte-order-mark (BOM) from the blob. On Windows, if there
** is either no BOM at all or an (le/be) UTF-16 BOM, a conversion to UTF-8 is
** done. If useMbcs is false and there is no BOM, the input string is assumed
** to be UTF-8 already, so no conversion is done.
*/
void blob_to_utf8_no_bom(Blob *pBlob, int useMbcs){
char *zUtf8;
int bomSize = 0;
int bomReverse = 0;
if( starts_with_utf8_bom(pBlob, &bomSize) ){
struct Blob temp;
zUtf8 = blob_str(pBlob) + bomSize;
blob_zero(&temp);
blob_append(&temp, zUtf8, -1);
blob_swap(pBlob, &temp);
blob_reset(&temp);
}else if( starts_with_utf16_bom(pBlob, &bomSize, &bomReverse) ){
zUtf8 = blob_buffer(pBlob);
if( bomReverse ){
/* Found BOM, but with reversed bytes */
unsigned int i = blob_size(pBlob);
while( i>1 ){
/* swap bytes of unicode representation */
char zTemp = zUtf8[--i];
zUtf8[i] = zUtf8[i-1];
zUtf8[--i] = zTemp;
}
}
/* Make sure the blob contains two terminating 0-bytes */
blob_append(pBlob, "\000\000", 3);
zUtf8 = blob_str(pBlob) + bomSize;
zUtf8 = fossil_unicode_to_utf8(zUtf8);
blob_reset(pBlob);
blob_set_dynamic(pBlob, zUtf8);
}else if( useMbcs && invalid_utf8(pBlob) ){
#if defined(_WIN32) || defined(__CYGWIN__)
zUtf8 = fossil_mbcs_to_utf8(blob_str(pBlob));
blob_reset(pBlob);
blob_append(pBlob, zUtf8, -1);
fossil_mbcs_free(zUtf8);
#else
blob_cp1252_to_utf8(pBlob);
#endif /* _WIN32 */
}
}