/* ** Copyright (c) 2011 D. Richard Hipp ** ** This program is free software; you can redistribute it and/or ** modify it under the terms of the Simplified BSD License (also ** known as the "2-Clause License" or "FreeBSD License".) ** This program is distributed in the hope that it will be useful, ** but without any warranty; without even the implied warranty of ** merchantability or fitness for a particular purpose. ** ** Author contact information: ** drh@hwaci.com ** http://www.hwaci.com/drh/ ** ******************************************************************************* ** ** This file contains code used to pattern matching using "glob" syntax. */ #include "config.h" #include "glob.h" #include <assert.h> /* ** Construct and return a string which is an SQL expression that will ** be TRUE if value zVal matches any of the GLOB expressions in the list ** zGlobList. For example: ** ** zVal: "x" ** zGlobList: "*.o,*.obj" ** ** Result: "(x GLOB '*.o' OR x GLOB '*.obj')" ** ** Commas and whitespace are considered to be element delimters. Each ** element of the GLOB list may optionally be enclosed in either '...' or ** "...". This allows commas and/or whitespace to be used in the elements ** themselves. ** ** The returned string is owned by the caller, who must fossil_free() ** it. */ char *glob_expr(const char *zVal, const char *zGlobList){ Blob expr; const char *zSep = "("; int nTerm = 0; int i; int cTerm; if( zGlobList==0 || zGlobList[0]==0 ) return fossil_strdup("0"); blob_zero(&expr); while( zGlobList[0] ){ while( fossil_isspace(zGlobList[0]) || zGlobList[0]==',' ){ zGlobList++; /* Skip leading commas, spaces, and newlines */ } if( zGlobList[0]==0 ) break; if( zGlobList[0]=='\'' || zGlobList[0]=='"' ){ cTerm = zGlobList[0]; zGlobList++; }else{ cTerm = ','; } /* Find the next delimter (or the end of the string). */ for(i=0; zGlobList[i] && zGlobList[i]!=cTerm; i++){ if( cTerm!=',' ) continue; /* If quoted, keep going. */ if( fossil_isspace(zGlobList[i]) ) break; /* If space, stop. */ } blob_appendf(&expr, "%s%s GLOB '%#q'", zSep, zVal, i, zGlobList); zSep = " OR "; if( cTerm!=',' && zGlobList[i] ) i++; zGlobList += i; if( zGlobList[0] ) zGlobList++; nTerm++; } if( nTerm ){ blob_appendf(&expr, ")"); return blob_str(&expr); }else{ return fossil_strdup("0"); } } #if INTERFACE /* ** A Glob object holds a set of patterns read to be matched against ** a string. */ struct Glob { int nPattern; /* Number of patterns */ char **azPattern; /* Array of pointers to patterns */ }; #endif /* INTERFACE */ /* ** zPatternList is a comma- or whitespace-separated list of glob patterns. ** Parse that list and use it to create a new Glob object. ** ** Elements of the glob list may be optionally enclosed in single- or ** double-quotes. This allows commas and whitespace to be part of a ** glob pattern. ** ** Leading and trailing spaces on glob patterns are ignored unless quoted. ** ** An empty or null pattern list results in a null glob, which will ** match nothing. */ Glob *glob_create(const char *zPatternList){ int nList; /* Size of zPatternList in bytes */ int i; /* Loop counters */ Glob *p; /* The glob being created */ char *z; /* Copy of the pattern list */ char delimiter; /* '\'' or '\"' or 0 */ if( zPatternList==0 || zPatternList[0]==0 ) return 0; nList = strlen(zPatternList); p = fossil_malloc( sizeof(*p) + nList+1 ); memset(p, 0, sizeof(*p)); z = (char*)&p[1]; memcpy(z, zPatternList, nList+1); while( z[0] ){ while( fossil_isspace(z[0]) || z[0]==',' ){ z++; /* Skip leading commas, spaces, and newlines */ } if( z[0]==0 ) break; if( z[0]=='\'' || z[0]=='"' ){ delimiter = z[0]; z++; }else{ delimiter = ','; } p->azPattern = fossil_realloc(p->azPattern, (p->nPattern+1)*sizeof(char*) ); p->azPattern[p->nPattern++] = z; /* Find the next delimiter (or the end of the string). */ for(i=0; z[i] && z[i]!=delimiter && !(delimiter==',' && fossil_isspace(z[i])); i++){ /* keep looking for the end of the glob pattern */ } if( z[i]==0 ) break; z[i] = 0; z += i+1; } return p; } /* ** Return TRUE if zString matches any of the GLOB patterns in the ** string zPatternList. ** ** This is a like calling glob_create(), glob_match(), and glob_free() ** in sequence, without the overhead of creating the reusable Glob object. ** Use this for one-time matches against a comma-separated GLOB list. */ int glob_multi_match(const char *zPatternList, const char *zString){ int i; /* Loop counters */ int n = 0; /* Pattern counter */ const char *z; /* Current GLOB pattern */ char delimiter; /* '\'' or '\"' or 0 */ int rc; /* Result of comparison */ char zPat[100]; /* Copy of just the current pattern */ if( zPatternList==0 ) return 0; z = zPatternList; while( z[0] ){ while( fossil_isspace(z[0]) || z[0]==',' ){ z++; /* Skip leading commas, spaces, and newlines */ } if( z[0]==0 ) break; if( z[0]=='\'' || z[0]=='"' ){ delimiter = z[0]; z++; }else{ delimiter = ','; } /* Find the next delimiter (or the end of the string). */ for(i=0; z[i] && z[i]!=delimiter && !(delimiter==',' && fossil_isspace(z[i])); i++){ /* keep looking for the end of the glob pattern */ } n++; if( i>sizeof(zPat)-1 ){ char *zMPat = fossil_strndup(z, i); rc = sqlite3_strglob(zMPat, zString); fossil_free(zMPat); }else{ memcpy(zPat, z, i); zPat[i] = 0; rc = sqlite3_strglob(zPat, zString); } if( rc==0 ) return n; if( z[i]==0 ) break; z += i+1; } return 0; } /* ** Return true (non-zero) if zString matches any of the patterns in ** the Glob. The value returned is actually a 1-based index of the pattern ** that matched. Return 0 if none of the patterns match zString. ** ** A NULL glob matches nothing. */ int glob_match(Glob *pGlob, const char *zString){ int i; if( pGlob==0 ) return 0; for(i=0; i<pGlob->nPattern; i++){ if( sqlite3_strglob(pGlob->azPattern[i], zString)==0 ) return i+1; } return 0; } /* ** Free all memory associated with the given Glob object */ void glob_free(Glob *pGlob){ if( pGlob ){ fossil_free(pGlob->azPattern); fossil_free(pGlob); } } /* ** Appends the given glob to the given buffer in the form of a ** JS/JSON-compatible array. It requires that pDest have been ** initialized. If pGlob is NULL or empty it emits [] (an empty ** array). */ void glob_render_json_to_blob(Glob *pGlob, Blob *pDest){ int i = 0; blob_append(pDest, "[", 1); for( ; pGlob && i < pGlob->nPattern; ++i ){ if(i){ blob_append(pDest, ",", 1); } blob_appendf(pDest, "%!j", pGlob->azPattern[i]); } blob_append(pDest, "]", 1); } /* ** Functionally equivalent to glob_render_json_to_blob() ** but outputs via cgi_print(). */ void glob_render_json_to_cgi(Glob *pGlob){ int i = 0; CX("["); for( ; pGlob && i < pGlob->nPattern; ++i ){ if(i){ CX(","); } CX("%!j", pGlob->azPattern[i]); } CX("]"); } /* ** COMMAND: test-glob ** ** Usage: %fossil test-glob PATTERN STRING... ** ** PATTERN is a comma- and whitespace-separated list of optionally ** quoted glob patterns. Show which of the STRINGs that follow match ** the PATTERN. ** ** If PATTERN begins with "@" the rest of the pattern is understood ** to be a setting name (such as binary-glob, crln-glob, or encoding-glob) ** and the value of that setting is used as the actually glob pattern. ** ** The output consists of two numbers and a STRING. The first number ** is the result of glob_match() and the second is the result of ** glob_multi_match(). */ void glob_test_cmd(void){ Glob *pGlob; int i; char *zPattern; if( g.argc<4 ) usage("PATTERN STRING ..."); zPattern = g.argv[2]; if( zPattern[0]=='@' ){ db_find_and_open_repository(OPEN_ANY_SCHEMA,0); zPattern = db_get(zPattern+1, 0); if( zPattern==0 ) fossil_fatal("no such setting: %s", g.argv[2]+1); fossil_print("GLOB pattern: %s\n", zPattern); } fossil_print("SQL expression: %s\n", glob_expr("x", zPattern)); pGlob = glob_create(zPattern); for(i=0; i<pGlob->nPattern; i++){ fossil_print("pattern[%d] = [%s]\n", i, pGlob->azPattern[i]); } for(i=3; i<g.argc; i++){ fossil_print("%d %d %s\n", glob_match(pGlob, g.argv[i]), glob_multi_match(zPattern, g.argv[i]), g.argv[i]); } glob_free(pGlob); }