/*
** Copyright (c) 2014 D. Richard Hipp
**
** This program is free software; you can redistribute it and/or
** modify it under the terms of the Simplified BSD License (also
** known as the "2-Clause License" or "FreeBSD License".)
**
** This program is distributed in the hope that it will be useful,
** but without any warranty; without even the implied warranty of
** merchantability or fitness for a particular purpose.
**
** Author contact information:
** drh@hwaci.com
** http://www.hwaci.com/drh/
**
*******************************************************************************
**
** This program reads Fossil source code files and tries to verify that
** printf-style format strings are correct.
**
** This program implements a compile-time validation step on the Fossil
** source code. Running this program is entirely optional. Its role is
** similar to the -Wall compiler switch on gcc, or the scan-build utility
** of clang, or other static analyzers. The purpose is to try to identify
** problems in the source code at compile-time. The difference is that this
** static checker is specifically designed for the particular printf formatter
** implementation used by Fossil.
**
** Checks include:
**
** * Verify that vararg formatting routines like blob_printf() or
** db_multi_exec() have the correct number of arguments for their
** format string.
**
** * For routines designed to generate SQL or HTML or a URL or JSON,
** detect and warn about possible injection attacks.
*/
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include <assert.h>
/*
** Debugging switch
*/
static int eVerbose = 0;
/*
** Malloc, aborting if it fails.
*/
void *safe_malloc(int nByte){
void *x = malloc(nByte);
if( x==0 ){
fprintf(stderr, "failed to allocate %d bytes\n", nByte);
exit(1);
}
return x;
}
void *safe_realloc(void *pOld, int nByte){
void *x = realloc(pOld, nByte);
if( x==0 ){
fprintf(stderr, "failed to allocate %d bytes\n", nByte);
exit(1);
}
return x;
}
/*
** Read the entire content of the file named zFilename into memory obtained
** from malloc(). Add a zero-terminator to the end.
** Return a pointer to that memory.
*/
static char *read_file(const char *zFilename){
FILE *in;
char *z;
int nByte;
int got;
in = fopen(zFilename, "rb");
if( in==0 ){
return 0;
}
fseek(in, 0, SEEK_END);
nByte = ftell(in);
fseek(in, 0, SEEK_SET);
z = safe_malloc( nByte+1 );
got = fread(z, 1, nByte, in);
z[got] = 0;
fclose(in);
return z;
}
/*
** When parsing the input file, the following token types are recognized.
*/
#define TK_SPACE 1 /* Whitespace or comments */
#define TK_ID 2 /* An identifier */
#define TK_STR 3 /* A string literal in double-quotes */
#define TK_OTHER 4 /* Any other token */
#define TK_EOF 99 /* End of file */
/*
** Determine the length and type of the token beginning at z[0]
*/
static int token_length(const char *z, int *pType, int *pLN){
int i;
if( z[0]==0 ){
*pType = TK_EOF;
return 0;
}
if( z[0]=='"' || z[0]=='\'' ){
for(i=1; z[i] && z[i]!=z[0]; i++){
if( z[i]=='\\' && z[i+1]!=0 ){
if( z[i+1]=='\n' ) (*pLN)++;
i++;
}
}
if( z[i]!=0 ) i++;
*pType = z[0]=='"' ? TK_STR : TK_OTHER;
return i;
}
if( isalnum(z[0]) || z[0]=='_' ){
for(i=1; isalnum(z[i]) || z[i]=='_'; i++){}
*pType = isalpha(z[0]) || z[0]=='_' ? TK_ID : TK_OTHER;
return i;
}
if( isspace(z[0]) ){
if( z[0]=='\n' ) (*pLN)++;
for(i=1; isspace(z[i]); i++){
if( z[i]=='\n' ) (*pLN)++;
}
*pType = TK_SPACE;
return i;
}
if( z[0]=='/' && z[1]=='*' ){
for(i=2; z[i] && (z[i]!='*' || z[i+1]!='/'); i++){
if( z[i]=='\n' ) (*pLN)++;
}
if( z[i] ) i += 2;
*pType = TK_SPACE;
return i;
}
if( z[0]=='/' && z[1]=='/' ){
for(i=2; z[i] && z[i]!='\n'; i++){}
if( z[i] ){
(*pLN)++;
i++;
}
*pType = TK_SPACE;
return i;
}
if( z[0]=='\\' && (z[1]=='\n' || (z[1]=='\r' && z[2]=='\n')) ){
*pType = TK_SPACE;
return 1;
}
*pType = TK_OTHER;
return 1;
}
/*
** Return the next non-whitespace token
*/
const char *next_non_whitespace(const char *z, int *pLen, int *pType){
int len;
int eType;
int ln = 0;
while( (len = token_length(z, &eType, &ln))>0 && eType==TK_SPACE ){
z += len;
}
*pLen = len;
*pType = eType;
return z;
}
/*
** Return index into z[] for the first balanced TK_OTHER token with
** value cValue.
*/
static int distance_to(const char *z, char cVal){
int len;
int dist = 0;
int eType;
int nNest = 0;
int ln = 0;
while( z[0] && (len = token_length(z, &eType, &ln))>0 ){
if( eType==TK_OTHER ){
if( z[0]==cVal && nNest==0 ){
break;
}else if( z[0]=='(' ){
nNest++;
}else if( z[0]==')' ){
nNest--;
}
}
dist += len;
z += len;
}
return dist;
}
/*
** Return the first non-whitespace characters in z[]
*/
static const char *skip_space(const char *z){
while( isspace(z[0]) ){ z++; }
return z;
}
/*
** Remove excess whitespace and nested "()" from string z.
*/
static char *simplify_expr(char *z){
int n = (int)strlen(z);
while( n>0 ){
if( isspace(z[0]) ){
z++;
n--;
continue;
}
if( z[0]=='(' && z[n-1]==')' ){
z++;
n -= 2;
continue;
}
break;
}
z[n] = 0;
return z;
}
/*
** Return true if the input is a string literal.
*/
static int is_string_lit(const char *z){
int nu1, nu2;
z = next_non_whitespace(z, &nu1, &nu2);
if( strcmp(z, "NULL")==0 ) return 1;
return z[0]=='"';
}
/*
** Return true if the input is an expression of string literals:
**
** EXPR ? "..." : "..."
*/
static int is_string_expr(const char *z){
int len = 0, eType;
const char *zOrig = z;
len = distance_to(z, '?');
if( z[len]==0 && skip_space(z)[0]=='(' ){
z = skip_space(z) + 1;
len = distance_to(z, '?');
}
z += len;
if( z[0]=='?' ){
z++;
z = next_non_whitespace(z, &len, &eType);
if( eType==TK_STR ){
z += len;
z = next_non_whitespace(z, &len, &eType);
if( eType==TK_OTHER && z[0]==':' ){
z += len;
z = next_non_whitespace(z, &len, &eType);
if( eType==TK_STR ){
z += len;
z = next_non_whitespace(z, &len, &eType);
if( eType==TK_EOF ) return 1;
if( eType==TK_OTHER && z[0]==')' && skip_space(zOrig)[0]=='(' ){
z += len;
z = next_non_whitespace(z, &len, &eType);
if( eType==TK_EOF ) return 1;
}
}
}
}
}
return 0;
}
/*
** A list of functions that return strings that are safe to insert into
** SQL using %s.
*/
static const char *azSafeFunc[] = {
"filename_collation",
"leaf_is_closed_sql",
"timeline_query_for_www",
"timeline_query_for_tty",
"blob_sql_text",
"glob_expr",
"fossil_all_reserved_names",
"configure_inop_rhs",
"db_setting_inop_rhs",
};
/*
** Return true if the input is an argument that is safe to use with %s
** while building an SQL statement.
*/
static int is_sql_safe(const char *z){
int len, eType;
int i;
/* A string literal is safe for use with %s */
if( is_string_lit(z) ) return 1;
/* Certain functions are guaranteed to return a string that is safe
** for use with %s */
z = next_non_whitespace(z, &len, &eType);
for(i=0; i<sizeof(azSafeFunc)/sizeof(azSafeFunc[0]); i++){
if( eType==TK_ID
&& strncmp(z, azSafeFunc[i], len)==0
&& strlen(azSafeFunc[i])==len
){
return 1;
}
}
/* Expressions of the form: EXPR ? "..." : "...." can count as
** a string literal. */
if( is_string_expr(z) ) return 1;
/* If the "safe-for-%s" comment appears in the argument, then
** let it through */
if( strstr(z, "/*safe-for-%s*/")!=0 ) return 1;
return 0;
}
/*
** Return true if the input is an argument that is never safe for use
** with %s.
*/
static int never_safe(const char *z){
if( strstr(z,"/*safe-for-%s*/")!=0 ) return 0;
if( z[0]=='P' ){
if( strncmp(z,"PIF(",4)==0 ) return 0;
if( strncmp(z,"PCK(",4)==0 ) return 0;
return 1;
}
if( strncmp(z,"cgi_param",9)==0 ) return 1;
return 0;
}
/*
** Processing flags
*/
#define FMT_SQL 0x00001 /* Generator for SQL text */
#define FMT_HTML 0x00002 /* Generator for HTML text */
#define FMT_URL 0x00004 /* Generator for URLs */
#define FMT_JSON 0x00008 /* Generator for JSON */
#define FMT_SAFE 0x00010 /* Generator for human-readable text */
#define FMT_LIT 0x00020 /* Just verify that a string literal */
#define FMT_PX 0x00040 /* Must have a literal prefix in format string */
/*
** A list of internal Fossil interfaces that take a printf-style format
** string.
*/
struct FmtFunc {
const char *zFName; /* Name of the function */
int iFmtArg; /* Index of format argument. Leftmost is 1. */
unsigned fmtFlags; /* Processing flags */
} aFmtFunc[] = {
{ "admin_log", 1, FMT_SAFE },
{ "ajax_route_error", 2, FMT_SAFE },
{ "audit_append", 3, FMT_SAFE },
{ "backofficeTrace", 1, FMT_SAFE },
{ "backoffice_log", 1, FMT_SAFE },
{ "blob_append_sql", 2, FMT_SQL },
{ "blob_appendf", 2, FMT_SAFE },
{ "cgi_debug", 1, FMT_SAFE },
{ "cgi_panic", 1, FMT_SAFE },
{ "cgi_printf", 1, FMT_HTML },
{ "cgi_printf_header", 1, FMT_HTML },
{ "cgi_redirectf", 1, FMT_URL },
{ "chref", 2, FMT_URL },
{ "CX", 1, FMT_HTML },
{ "db_blob", 2, FMT_SQL },
{ "db_debug", 1, FMT_SQL },
{ "db_double", 2, FMT_SQL },
{ "db_err", 1, FMT_SAFE },
{ "db_exists", 1, FMT_SQL },
{ "db_get_mprintf", 2, FMT_SAFE },
{ "db_int", 2, FMT_SQL },
{ "db_int64", 2, FMT_SQL },
{ "db_lset", 1, FMT_LIT },
{ "db_lset_int", 1, FMT_LIT },
{ "db_multi_exec", 1, FMT_SQL },
{ "db_optional_sql", 2, FMT_SQL },
{ "db_prepare", 2, FMT_SQL },
{ "db_prepare_ignore_error", 2, FMT_SQL },
{ "db_set", 1, FMT_LIT },
{ "db_set_int", 1, FMT_LIT },
{ "db_set_mprintf", 3, FMT_PX },
{ "db_static_prepare", 2, FMT_SQL },
{ "db_text", 2, FMT_SQL },
{ "db_unset", 1, FMT_LIT },
{ "db_unset_mprintf", 2, FMT_PX },
{ "emailerError", 2, FMT_SAFE },
{ "entry_attribute", 4, FMT_LIT },
{ "fileedit_ajax_error", 2, FMT_SAFE },
{ "form_begin", 2, FMT_URL },
{ "fossil_error", 2, FMT_SAFE },
{ "fossil_errorlog", 1, FMT_SAFE },
{ "fossil_fatal", 1, FMT_SAFE },
{ "fossil_fatal_recursive", 1, FMT_SAFE },
{ "fossil_panic", 1, FMT_SAFE },
{ "fossil_print", 1, FMT_SAFE },
{ "fossil_trace", 1, FMT_SAFE },
{ "fossil_warning", 1, FMT_SAFE },
{ "gitmirror_message", 2, FMT_SAFE },
{ "href", 1, FMT_URL },
{ "json_new_string_f", 1, FMT_SAFE },
{ "json_set_err", 2, FMT_SAFE },
{ "json_warn", 2, FMT_SAFE },
{ "mprintf", 1, FMT_SAFE },
{ "multiple_choice_attribute", 3, FMT_LIT },
{ "onoff_attribute", 3, FMT_LIT },
{ "pop3_print", 2, FMT_SAFE },
{ "smtp_send_line", 2, FMT_SAFE },
{ "smtp_server_send", 2, FMT_SAFE },
{ "socket_set_errmsg", 1, FMT_SAFE },
{ "ssl_set_errmsg", 1, FMT_SAFE },
{ "style_copy_button", 5, FMT_SAFE },
{ "style_header", 1, FMT_HTML },
{ "style_set_current_page", 1, FMT_URL },
{ "style_submenu_element", 2, FMT_URL },
{ "style_submenu_sql", 3, FMT_SQL },
{ "textarea_attribute", 5, FMT_LIT },
{ "tktsetup_generic", 1, FMT_LIT },
{ "webpage_error", 1, FMT_SAFE },
{ "webpage_notfound_error", 1, FMT_SAFE },
{ "xfersetup_generic", 1, FMT_LIT },
{ "xhref", 2, FMT_URL },
};
/*
** Comparison function for two FmtFunc entries
*/
static int fmtfunc_cmp(const void *pAA, const void *pBB){
const struct FmtFunc *pA = (const struct FmtFunc*)pAA;
const struct FmtFunc *pB = (const struct FmtFunc*)pBB;
return strcmp(pA->zFName, pB->zFName);
}
/*
** Determine if the indentifier zIdent of length nIndent is a Fossil
** internal interface that uses a printf-style argument. Return zero if not.
** Return the index of the format string if true with the left-most
** argument having an index of 1.
*/
static int isFormatFunc(const char *zIdent, int nIdent, unsigned *pFlags){
int upr, lwr;
lwr = 0;
upr = sizeof(aFmtFunc)/sizeof(aFmtFunc[0]) - 1;
while( lwr<=upr ){
unsigned x = (lwr + upr)/2;
int c = strncmp(zIdent, aFmtFunc[x].zFName, nIdent);
if( c==0 ){
if( aFmtFunc[x].zFName[nIdent]==0 ){
*pFlags = aFmtFunc[x].fmtFlags;
return aFmtFunc[x].iFmtArg;
}
c = -1;
}
if( c<0 ){
upr = x - 1;
}else{
lwr = x + 1;
}
}
*pFlags = 0;
return 0;
}
/*
** Return the expected number of arguments for the format string.
** Return -1 if the value cannot be computed.
**
** For each argument less than nType, store the conversion character
** for that argument in cType[i].
**
** Store the number of initial literal characters of the format string
** in *pInit.
*/
static int formatArgCount(const char *z, int nType, char *cType, int *pInit){
int nArg = 0;
int i, k;
int len;
int eType;
int ln = 0;
*pInit = 0;
while( z[0] ){
len = token_length(z, &eType, &ln);
if( eType==TK_STR ){
for(i=1; i<len-1 && isalpha(z[i]); i++){}
*pInit = i-1;
for(i=1; i<len-1; i++){
if( z[i]!='%' ) continue;
if( z[i+1]=='%' ){ i++; continue; }
for(k=i+1; k<len && !isalpha(z[k]); k++){
if( z[k]=='*' || z[k]=='#' ){
if( nArg<nType ) cType[nArg] = z[k];
nArg++;
}
}
if( z[k]!='R' ){
if( nArg<nType ) cType[nArg] = z[k];
nArg++;
}
}
}
z += len;
}
return nArg;
}
/*
** The function call that begins at zFCall[0] (which is on line lnFCall of the
** original file) is a function that uses a printf-style format string
** on argument number fmtArg. It has processings flags fmtFlags. Do
** compile-time checking on this function, output any errors, and return
** the number of errors.
*/
static int checkFormatFunc(
const char *zFilename, /* Name of the file being processed */
const char *zFCall, /* Pointer to start of function call */
int lnFCall, /* Line number that holds z[0] */
int fmtArg, /* Format string should be this argument */
int fmtFlags /* Extra processing flags */
){
int szFName;
int eToken;
int ln = lnFCall;
int len;
const char *zStart;
char *z;
char *zCopy;
int nArg = 0;
const char **azArg = 0;
int i, k;
int nErr = 0;
char *acType;
int nInit = 0;
szFName = token_length(zFCall, &eToken, &ln);
zStart = next_non_whitespace(zFCall+szFName, &len, &eToken);
assert( zStart[0]=='(' && len==1 );
len = distance_to(zStart+1, ')');
zCopy = safe_malloc( len + 1 );
memcpy(zCopy, zStart+1, len);
zCopy[len] = 0;
azArg = 0;
nArg = 0;
z = zCopy;
while( z[0] ){
char cEnd;
len = distance_to(z, ',');
cEnd = z[len];
z[len] = 0;
azArg = safe_realloc((char*)azArg, (sizeof(azArg[0])+1)*(nArg+1));
azArg[nArg++] = simplify_expr(z);
if( cEnd==0 ) break;
z += len + 1;
}
acType = (char*)&azArg[nArg];
if( fmtArg>nArg ){
printf("%s:%d: too few arguments to %.*s()\n",
zFilename, lnFCall, szFName, zFCall);
nErr++;
}else{
const char *zFmt = azArg[fmtArg-1];
const char *zOverride = strstr(zFmt, "/*works-like:");
if( zOverride ) zFmt = zOverride + sizeof("/*works-like:")-1;
if( fmtFlags & FMT_LIT ){
if( !is_string_lit(zFmt) ){
printf("%s:%d: argument %d to %.*s() should be a string literal\n",
zFilename, lnFCall, fmtArg, szFName, zFCall);
nErr++;
}
}else if( !is_string_lit(zFmt) ){
printf("%s:%d: %.*s() has non-constant format on arg[%d]\n",
zFilename, lnFCall, szFName, zFCall, fmtArg-1);
nErr++;
}else if( (k = formatArgCount(zFmt, nArg, acType, &nInit))>=0
&& nArg!=fmtArg+k ){
printf("%s:%d: too %s arguments to %.*s() "
"- got %d and expected %d\n",
zFilename, lnFCall, (nArg<fmtArg+k ? "few" : "many"),
szFName, zFCall, nArg, fmtArg+k);
nErr++;
}else if( (fmtFlags & FMT_PX)!=0 ){
if( nInit==0 ){
printf("%s:%d: format string on %.*s() should have"
" an ASCII character prefix\n",
zFilename, lnFCall, szFName, zFCall);
nErr++;
}
}else if( (fmtFlags & FMT_SAFE)==0 ){
for(i=0; i<nArg && i<k; i++){
if( (acType[i]=='s' || acType[i]=='z' || acType[i]=='b') ){
const char *zExpr = azArg[fmtArg+i];
if( never_safe(zExpr) ){
printf("%s:%d: Argument %d to %.*s() is not safe for"
" a query parameter\n",
zFilename, lnFCall, i+fmtArg, szFName, zFCall);
nErr++;
}else if( (fmtFlags & FMT_SQL)!=0 && !is_sql_safe(zExpr) ){
printf("%s:%d: Argument %d to %.*s() not safe for SQL\n",
zFilename, lnFCall, i+fmtArg, szFName, zFCall);
nErr++;
}
}
}
}
}
if( nErr ){
for(i=0; i<nArg; i++){
printf(" arg[%d]: %s\n", i, azArg[i]);
}
}else if( eVerbose>1 ){
printf("%s:%d: %.*s() ok for %d arguments\n",
zFilename, lnFCall, szFName, zFCall, nArg);
}
free((char*)azArg);
free(zCopy);
return nErr;
}
/*
** Do a design-rule check of format strings for the file named zName
** with content zContent. Write errors on standard output. Return
** the number of errors.
*/
static int scan_file(const char *zName, const char *zContent){
const char *z;
int ln = 0;
int szToken;
int eToken;
const char *zPrev = 0;
int ePrev = 0;
int szPrev = 0;
int lnPrev = 0;
int nCurly = 0;
int x;
unsigned fmtFlags = 0;
int nErr = 0;
if( zContent==0 ){
printf("cannot read file: %s\n", zName);
return 1;
}
for(z=zContent; z[0]; z += szToken){
szToken = token_length(z, &eToken, &ln);
if( eToken==TK_SPACE ) continue;
if( eToken==TK_OTHER ){
if( z[0]=='{' ){
nCurly++;
}else if( z[0]=='}' ){
nCurly--;
}else if( nCurly>0 && z[0]=='(' && ePrev==TK_ID
&& (x = isFormatFunc(zPrev,szPrev,&fmtFlags))>0 ){
nErr += checkFormatFunc(zName, zPrev, lnPrev, x, fmtFlags);
}
}
zPrev = z;
ePrev = eToken;
szPrev = szToken;
lnPrev = ln;
}
return nErr;
}
/*
** Check for format-string design rule violations on all files listed
** on the command-line.
**
** The eVerbose global variable is incremented with each "-v" argument.
*/
int main(int argc, char **argv){
int i;
int nErr = 0;
qsort(aFmtFunc, sizeof(aFmtFunc)/sizeof(aFmtFunc[0]),
sizeof(aFmtFunc[0]), fmtfunc_cmp);
for(i=1; i<argc; i++){
char *zFile;
if( strcmp(argv[i],"-v")==0 ){
eVerbose++;
continue;
}
if( eVerbose>0 ) printf("Processing %s...\n", argv[i]);
zFile = read_file(argv[i]);
nErr += scan_file(argv[i], zFile);
free(zFile);
}
return nErr;
}