Fossil

Check-in [a93b58cf]
Login

Check-in [a93b58cf]

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Enhance the GLOB processing on settings like binary-glob so that any sequence of commas and/or whitespace serves as a delimiter. Delimiters inside of quotes are part the glob pattern and are ignored. Enhance the test-glob command to verify correct operation of the globber.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: a93b58cf83ce00d069fbbd99bc6aa6cc51e3fa65
User & Date: drh 2013-03-06 20:53:10
Context
2013-03-07
01:12
For the looks_like_utf*() functions, continue to examine blob content in order to fully set the output flags, even if it appears to be binary. Also, increase the strictness of starts_with_utf16_bom() and make it more accurate. ... (check-in: 13fac7f7 user: mistachkin tags: trunk)
2013-03-06
20:53
Enhance the GLOB processing on settings like binary-glob so that any sequence of commas and/or whitespace serves as a delimiter. Delimiters inside of quotes are part the glob pattern and are ignored. Enhance the test-glob command to verify correct operation of the globber. ... (check-in: a93b58cf user: drh tags: trunk)
20:50
Enhancements to the test-glob command for better testing of the setting globber. ... (Closed-Leaf check-in: ad44353d user: drh tags: globListRefactor)
20:46
Modify the commit_warning() function so the warning for possible binary content is higher priority than the other warnings. ... (check-in: 2d9ae4ee user: mistachkin tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch Hide diffs
Changes to src/glob.c.
27
28
29
30
31
32
33

34
35
36
37
38
39

40
41
42
43
44
45
46
47
48
49
50
51


52
53
54
55
56
57
58

59
60
61
62
63
64
65
66
67
68
** zGlobList.  For example:
**
**    zVal:       "x"
**    zGlobList:  "*.o,*.obj"
**
**    Result:     "(x GLOB '*.o' OR x GLOB '*.obj')"
**

** Each element of the GLOB list may optionally be enclosed in either '...'
** or "...".  This allows commas in the expression.  Whitespace at the
** beginning and end of each GLOB pattern is ignored, except when enclosed
** within '...' or "...".
**
** This routine makes no effort to free the memory space it uses.

*/
char *glob_expr(const char *zVal, const char *zGlobList){
  Blob expr;
  char *zSep = "(";
  int nTerm = 0;
  int i;
  int cTerm;

  if( zGlobList==0 || zGlobList[0]==0 ) return "0";
  blob_zero(&expr);
  while( zGlobList[0] ){
    while( fossil_isspace(zGlobList[0]) || zGlobList[0]==',' ) zGlobList++;


    if( zGlobList[0]==0 ) break;
    if( zGlobList[0]=='\'' || zGlobList[0]=='"' ){
      cTerm = zGlobList[0];
      zGlobList++;
    }else{
      cTerm = ',';
    }

    for(i=0; zGlobList[i] && zGlobList[i]!=cTerm && zGlobList[i]!='\n'; i++){}
    if( cTerm==',' ){
      while( i>0 && fossil_isspace(zGlobList[i-1]) ){ i--; }
    }
    blob_appendf(&expr, "%s%s GLOB '%#q'", zSep, zVal, i, zGlobList);
    zSep = " OR ";
    if( cTerm!=',' && zGlobList[i] ) i++;
    zGlobList += i;
    if( zGlobList[0] ) zGlobList++;
    nTerm++;






>
|
|
<
|

|
>











|
>
>







>
|
|
|







27
28
29
30
31
32
33
34
35
36

37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
** zGlobList.  For example:
**
**    zVal:       "x"
**    zGlobList:  "*.o,*.obj"
**
**    Result:     "(x GLOB '*.o' OR x GLOB '*.obj')"
**
** Commas and whitespace are considered to be element delimters.  Each
** element of the GLOB list may optionally be enclosed in either '...' or
** "...".  This allows commas and/or whitespace to be used in the elements

** themselves.
**
** This routine makes no effort to free the memory space it uses, which
** currently consists of a blob object and its contents.
*/
char *glob_expr(const char *zVal, const char *zGlobList){
  Blob expr;
  char *zSep = "(";
  int nTerm = 0;
  int i;
  int cTerm;

  if( zGlobList==0 || zGlobList[0]==0 ) return "0";
  blob_zero(&expr);
  while( zGlobList[0] ){
    while( fossil_isspace(zGlobList[0]) || zGlobList[0]==',' ){
      zGlobList++;  /* Skip leading commas, spaces, and newlines */
    }
    if( zGlobList[0]==0 ) break;
    if( zGlobList[0]=='\'' || zGlobList[0]=='"' ){
      cTerm = zGlobList[0];
      zGlobList++;
    }else{
      cTerm = ',';
    }
    /* Find the next delimter (or the end of the string). */
    for(i=0; zGlobList[i] && zGlobList[i]!=cTerm; i++){
      if( cTerm!=',' ) continue; /* If quoted, keep going. */
      if( fossil_isspace(zGlobList[i]) ) break; /* If space, stop. */
    }
    blob_appendf(&expr, "%s%s GLOB '%#q'", zSep, zVal, i, zGlobList);
    zSep = " OR ";
    if( cTerm!=',' && zGlobList[i] ) i++;
    zGlobList += i;
    if( zGlobList[0] ) zGlobList++;
    nTerm++;
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117

118
119
120
121
122
123
124
125
126

127
128
129
130
131
132
133
134
135
136
137
138
struct Glob {
  int nPattern;        /* Number of patterns */
  char **azPattern;    /* Array of pointers to patterns */
};
#endif /* INTERFACE */

/*
** zPatternList is a comma-separate list of glob patterns.  Parse up
** that list and use it to create a new Glob object.
**
** Elements of the glob list may be optionally enclosed in single our
** double-quotes.  This allows a comma to be part of a glob.
**
** Leading and trailing spaces on unquoted glob patterns are ignored.
**
** An empty or null pattern list results in a null glob, which will
** match nothing.
*/
Glob *glob_create(const char *zPatternList){
  int nList;         /* Size of zPatternList in bytes */
  int i, j;          /* Loop counters */
  Glob *p;           /* The glob being created */
  char *z;           /* Copy of the pattern list */
  char delimiter;    /* '\'' or '\"' or 0 */

  if( zPatternList==0 || zPatternList[0]==0 ) return 0;
  nList = strlen(zPatternList);
  p = fossil_malloc( sizeof(*p) + nList+1 );
  memset(p, 0, sizeof(*p));
  z = (char*)&p[1];
  memcpy(z, zPatternList, nList+1);
  while( z[0] ){
    while( z[0]==',' || z[0]==' ' || z[0]=='\n' || z[0]=='\r' ){
      z++;  /* Skip leading spaces and newlines */
    }

    if( z[0]=='\'' || z[0]=='"' ){
      delimiter = z[0];
      z++;
    }else{
      delimiter = ',';
    }
    if( z[0]==0 ) break;
    p->azPattern = fossil_realloc(p->azPattern, (p->nPattern+1)*sizeof(char*) );
    p->azPattern[p->nPattern++] = z;

    for(i=0; z[i] && z[i]!=delimiter && z[i]!='\n' && z[i]!='\r'; i++){}
    if( delimiter==',' ){
      /* Remove trailing spaces / newlines on a comma-delimited pattern */
      for(j=i; j>1 && (z[j-1]==' ' || z[j-1]=='\n' || z[j-1]=='\r'); j--){}
      if( j<i ) z[j] = 0;
    }
    if( z[i]==0 ) break;
    z[i] = 0;
    z += i+1;
  }
  return p;
}






|



|








|











|
|

>






<


>
|
|
<
<
|







87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128

129
130
131
132
133


134
135
136
137
138
139
140
141
struct Glob {
  int nPattern;        /* Number of patterns */
  char **azPattern;    /* Array of pointers to patterns */
};
#endif /* INTERFACE */

/*
** zPatternList is a comma-separated list of glob patterns.  Parse up
** that list and use it to create a new Glob object.
**
** Elements of the glob list may be optionally enclosed in single our
** double-quotes.  This allows a comma to be part of a glob pattern.
**
** Leading and trailing spaces on unquoted glob patterns are ignored.
**
** An empty or null pattern list results in a null glob, which will
** match nothing.
*/
Glob *glob_create(const char *zPatternList){
  int nList;         /* Size of zPatternList in bytes */
  int i;             /* Loop counters */
  Glob *p;           /* The glob being created */
  char *z;           /* Copy of the pattern list */
  char delimiter;    /* '\'' or '\"' or 0 */

  if( zPatternList==0 || zPatternList[0]==0 ) return 0;
  nList = strlen(zPatternList);
  p = fossil_malloc( sizeof(*p) + nList+1 );
  memset(p, 0, sizeof(*p));
  z = (char*)&p[1];
  memcpy(z, zPatternList, nList+1);
  while( z[0] ){
    while( fossil_isspace(z[0]) || z[0]==',' ){
      z++;  /* Skip leading commas, spaces, and newlines */
    }
    if( z[0]==0 ) break;
    if( z[0]=='\'' || z[0]=='"' ){
      delimiter = z[0];
      z++;
    }else{
      delimiter = ',';
    }

    p->azPattern = fossil_realloc(p->azPattern, (p->nPattern+1)*sizeof(char*) );
    p->azPattern[p->nPattern++] = z;
    /* Find the next delimter (or the end of the string). */
    for(i=0; z[i] && z[i]!=delimiter; i++){
      if( delimiter!=',' ) continue; /* If quoted, keep going. */


      if( fossil_isspace(z[i]) ) break; /* If space, stop. */
    }
    if( z[i]==0 ) break;
    z[i] = 0;
    z += i+1;
  }
  return p;
}
243
244
245
246
247
248
249

250
251




252
253
254
255

256







257
258
259
260
261
262
263
264
265
266
}

/*
** COMMAND: test-glob
**
** Usage:  %fossil test-glob PATTERN STRING...
**

** PATTERN is a comma-separated list of glob patterns.  Show which of
** the STRINGs that follow match the PATTERN.




*/
void glob_test_cmd(void){
  Glob *pGlob;
  int i;

  if( g.argc<4 ) usage("PATTERN STRING ...");







  fossil_print("SQL expression: %s\n", glob_expr("x", g.argv[2]));
  pGlob = glob_create(g.argv[2]);
  for(i=0; i<pGlob->nPattern; i++){
    fossil_print("pattern[%d] = [%s]\n", i, pGlob->azPattern[i]);
  }
  for(i=3; i<g.argc; i++){
    fossil_print("%d %s\n", glob_match(pGlob, g.argv[i]), g.argv[i]);
  }
  glob_free(pGlob);
}






>
|
|
>
>
>
>




>

>
>
>
>
>
>
>
|
|








246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
}

/*
** COMMAND: test-glob
**
** Usage:  %fossil test-glob PATTERN STRING...
**
** PATTERN is a comma- and whitespace-separated list of optionally
** quoted glob patterns.  Show which of the STRINGs that follow match
** the PATTERN.
**
** If PATTERN begins with "@" the the rest of the pattern is understood
** to be a setting name (such as binary-glob, crln-glob, or encoding-glob)
** and the value of that setting is used as the actually glob pattern.
*/
void glob_test_cmd(void){
  Glob *pGlob;
  int i;
  char *zPattern;
  if( g.argc<4 ) usage("PATTERN STRING ...");
  zPattern = g.argv[2];
  if( zPattern[0]=='@' ){
    db_find_and_open_repository(OPEN_ANY_SCHEMA,0);
    zPattern = db_get(zPattern+1, 0);
    if( zPattern==0 ) fossil_fatal("no such setting: %s", g.argv[2]+1);
    fossil_print("GLOB pattern: %s\n", zPattern);
  }
  fossil_print("SQL expression: %s\n", glob_expr("x", zPattern));
  pGlob = glob_create(zPattern);
  for(i=0; i<pGlob->nPattern; i++){
    fossil_print("pattern[%d] = [%s]\n", i, pGlob->azPattern[i]);
  }
  for(i=3; i<g.argc; i++){
    fossil_print("%d %s\n", glob_match(pGlob, g.argv[i]), g.argv[i]);
  }
  glob_free(pGlob);
}
Added test/glob.test.














































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
#
# Copyright (c) 2013 D. Richard Hipp
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the Simplified BSD License (also
# known as the "2-Clause License" or "FreeBSD License".)
#
# This program is distributed in the hope that it will be useful,
# but without any warranty; without even the implied warranty of
# merchantability or fitness for a particular purpose.
#
# Author contact information:
#   drh@hwaci.com
#   http://www.hwaci.com/drh/
#
############################################################################
#
# Test glob pattern parsing
#

proc glob-parse {testname args} {
  set i 1
  foreach {pattern string result} $args {
    fossil test-glob $pattern $string
    test glob-parse-$testname.$i {$::RESULT eq $result}
    incr i
  }
}

glob-parse 100 test test [string map [list \r\n \n] \
{SQL expression: (x GLOB 'test')
pattern[0] = [test]
1 test}]

glob-parse 101 "one two" one [string map [list \r\n \n] \
{SQL expression: (x GLOB 'one' OR x GLOB 'two')
pattern[0] = [one]
pattern[1] = [two]
1 one}]

glob-parse 102 t* test [string map [list \r\n \n] \
{SQL expression: (x GLOB 't*')
pattern[0] = [t*]
1 test}]

glob-parse 103 "o* two" one [string map [list \r\n \n] \
{SQL expression: (x GLOB 'o*' OR x GLOB 'two')
pattern[0] = [o*]
pattern[1] = [two]
1 one}]

glob-parse 104 {"o* two" "three four"} "one two" [string map [list \r\n \n] \
{SQL expression: (x GLOB 'o* two' OR x GLOB 'three four')
pattern[0] = [o* two]
pattern[1] = [three four]
1 one two}]

glob-parse 105 {"o* two" "three four"} "two one" [string map [list \r\n \n] \
{SQL expression: (x GLOB 'o* two' OR x GLOB 'three four')
pattern[0] = [o* two]
pattern[1] = [three four]
0 two one}]

glob-parse 106 "\"o*\ntwo\" \"three\nfour\"" "one\ntwo" \
[string map [list \r\n \n] \
{SQL expression: (x GLOB 'o*
two' OR x GLOB 'three
four')
pattern[0] = [o*
two]
pattern[1] = [three
four]
1 one
two}]

glob-parse 107 "\"o*\ntwo\" \"three\nfour\"" "two\none" \
[string map [list \r\n \n] \
{SQL expression: (x GLOB 'o*
two' OR x GLOB 'three
four')
pattern[0] = [o*
two]
pattern[1] = [three
four]
0 two
one}]

glob-parse 108 "\"o*\rtwo\" \"three\rfour\"" "one\rtwo" \
[string map [list \r\n \n] \
{SQL expression: (x GLOB 'o*
two' OR x GLOB 'three
four')
pattern[0] = [o*
two]
pattern[1] = [three
four]
1 one
two}]

glob-parse 109 "\"o*\rtwo\" \"three\rfour\"" "two\rone" \
[string map [list \r\n \n] \
{SQL expression: (x GLOB 'o*
two' OR x GLOB 'three
four')
pattern[0] = [o*
two]
pattern[1] = [three
four]
0 two
one}]

glob-parse 110 "'o*\ntwo' 'three\nfour'" "one\ntwo" \
[string map [list \r\n \n] \
{SQL expression: (x GLOB 'o*
two' OR x GLOB 'three
four')
pattern[0] = [o*
two]
pattern[1] = [three
four]
1 one
two}]

glob-parse 111 "'o*\ntwo' 'three\nfour'" "two\none" \
[string map [list \r\n \n] \
{SQL expression: (x GLOB 'o*
two' OR x GLOB 'three
four')
pattern[0] = [o*
two]
pattern[1] = [three
four]
0 two
one}]

glob-parse 112 "\"'o*' 'two'\" \"'three' 'four'\"" "'one' 'two'" \
[string map [list \r\n \n] \
{SQL expression: (x GLOB '''o*'' ''two''' OR x GLOB '''three'' ''four''')
pattern[0] = ['o*' 'two']
pattern[1] = ['three' 'four']
1 'one' 'two'}]

glob-parse 113 "\"'o*' 'two'\" \"'three' 'four'\"" "two one" \
[string map [list \r\n \n] \
{SQL expression: (x GLOB '''o*'' ''two''' OR x GLOB '''three'' ''four''')
pattern[0] = ['o*' 'two']
pattern[1] = ['three' 'four']
0 two one}]

glob-parse 114 o*,two one [string map [list \r\n \n] \
{SQL expression: (x GLOB 'o*' OR x GLOB 'two')
pattern[0] = [o*]
pattern[1] = [two]
1 one}]

glob-parse 115 "o*,two three,four" "one two" [string map [list \r\n \n] \
{SQL expression: (x GLOB 'o*' OR x GLOB 'two' OR x GLOB 'three' OR x GLOB 'four')
pattern[0] = [o*]
pattern[1] = [two]
pattern[2] = [three]
pattern[3] = [four]
1 one two}]

glob-parse 116 'o*,two' one [string map [list \r\n \n] \
{SQL expression: (x GLOB 'o*,two')
pattern[0] = [o*,two]
0 one}]

glob-parse 117 'o*,two' one,two [string map [list \r\n \n] \
{SQL expression: (x GLOB 'o*,two')
pattern[0] = [o*,two]
1 one,two}]

glob-parse 118 "'o*,two three,four'" "one two three,four" \
[string map [list \r\n \n] \
{SQL expression: (x GLOB 'o*,two three,four')
pattern[0] = [o*,two three,four]
0 one two three,four}]

glob-parse 119 "'o*,two three,four'" "one,two three,four" \
[string map [list \r\n \n] \
{SQL expression: (x GLOB 'o*,two three,four')
pattern[0] = [o*,two three,four]
1 one,two three,four}]