Fossil

Check-in [d48399bd]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Do not enforce the requirement that filenames in manifests be strict UTF8 because some bug or another in a legacy version of Fossil allowed in some Latin-1 Suppliment characters encoded as a single byte (ex: 0xf3) instead of the correct two-byte encoding (ex: 0xc3 0xb3) and so if we start enforcing strict UTF8, some check-ins from those legacy versions of Fossil will be inaccessible.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1:d48399bd3907820a8ebb75644c6e4784271fce9d
User & Date: drh 2012-12-12 22:52:24
Context
2012-12-13
12:31
When determining the interactive user, check the FOSSIL_USER environment variable. Also, add a property to the Windows resource file indicating if markdown support was compiled into the binary. check-in: 743b64ec user: mistachkin tags: trunk
2012-12-12
22:52
Do not enforce the requirement that filenames in manifests be strict UTF8 because some bug or another in a legacy version of Fossil allowed in some Latin-1 Suppliment characters encoded as a single byte (ex: 0xf3) instead of the correct two-byte encoding (ex: 0xc3 0xb3) and so if we start enforcing strict UTF8, some check-ins from those legacy versions of Fossil will be inaccessible. check-in: d48399bd user: drh tags: trunk
22:03
typo integrate latest version of dirent.h check-in: 8e50ff0c user: jan.nijtmans tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/add.c.

138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
*/
static int add_one_file(
  const char *zPath,   /* Tree-name of file to add. */
  int vid,             /* Add to this VFILE */
  int caseSensitive    /* True if filenames are case sensitive */
){
  const char *zCollate = caseSensitive ? "binary" : "nocase";
  if( !file_is_simple_pathname(zPath) ){
    fossil_warning("filename contains illegal characters: %s", zPath);
    return 0;
  }
  if( db_exists("SELECT 1 FROM vfile"
                " WHERE pathname=%Q COLLATE %s", zPath, zCollate) ){
    db_multi_exec("UPDATE vfile SET deleted=0"
                  " WHERE pathname=%Q COLLATE %s", zPath, zCollate);







|







138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
*/
static int add_one_file(
  const char *zPath,   /* Tree-name of file to add. */
  int vid,             /* Add to this VFILE */
  int caseSensitive    /* True if filenames are case sensitive */
){
  const char *zCollate = caseSensitive ? "binary" : "nocase";
  if( !file_is_simple_pathname(zPath, 1) ){
    fossil_warning("filename contains illegal characters: %s", zPath);
    return 0;
  }
  if( db_exists("SELECT 1 FROM vfile"
                " WHERE pathname=%Q COLLATE %s", zPath, zCollate) ){
    db_multi_exec("UPDATE vfile SET deleted=0"
                  " WHERE pathname=%Q COLLATE %s", zPath, zCollate);

Changes to src/doc.c.

378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
    goto doc_not_found;
  }
  g.zPath = mprintf("%s/%s", g.zPath, zName);
  memcpy(zBaseline, zName, i);
  zBaseline[i] = 0;
  zName += i;
  while( zName[0]=='/' ){ zName++; }
  if( !file_is_simple_pathname(zName) ){
    int n = strlen(zName);
    if( n>0 && zName[n-1]=='/' ){
      zName = mprintf("%sindex.html", zName);
      if( !file_is_simple_pathname(zName) ){
        goto doc_not_found;
      }
    }else{
      goto doc_not_found;
    }
  }
  if( fossil_strcmp(zBaseline,"ckout")==0 && db_open_local()==0 ){







|



|







378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
    goto doc_not_found;
  }
  g.zPath = mprintf("%s/%s", g.zPath, zName);
  memcpy(zBaseline, zName, i);
  zBaseline[i] = 0;
  zName += i;
  while( zName[0]=='/' ){ zName++; }
  if( !file_is_simple_pathname(zName, 1) ){
    int n = strlen(zName);
    if( n>0 && zName[n-1]=='/' ){
      zName = mprintf("%sindex.html", zName);
      if( !file_is_simple_pathname(zName, 1) ){
        goto doc_not_found;
      }
    }else{
      goto doc_not_found;
    }
  }
  if( fossil_strcmp(zBaseline,"ckout")==0 && db_open_local()==0 ){

Changes to src/file.c.

485
486
487
488
489
490
491




492
493
494
495

496
497
498
499
500
501

502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524

525
526
527
528
529
530
531
**
**     *  Does not begin with "/"
**     *  Does not contain any path element named "." or ".."
**     *  Does not contain any of these characters in the path: "\"
**     *  Does not end with "/".
**     *  Does not contain two or more "/" characters in a row.
**     *  Contains at least one character




*/
int file_is_simple_pathname(const char *z){
  int i;
  char c = z[0];

  if( c=='/' || c==0 ) return 0;
  if( c=='.' ){
    if( z[1]=='/' || z[1]==0 ) return 0;
    if( z[1]=='.' && (z[2]=='/' || z[2]==0) ) return 0;
  }
  for(i=0; (c=z[i])!=0; i++){

    if( (c & 0xf0) == 0xf0 ) {
      /* Unicode characters > U+FFFF are not supported.
       * Windows XP and earlier cannot handle them.
       */
      return 0;
    }
    if( (c & 0xf0) == 0xe0 ) {
      /* This is a 3-byte UTF-8 character */
      if ( (c & 0xfe) == 0xee ){
        /* Range U+E000 - U+FFFF (Starting with 0xee or 0xef in UTF-8 ) */
        if ( (c & 1) && ((z[i+1] & 0xff) >= 0xa4) ){
          /* But exclude U+F900 - U+FFFF (0xef followed by byte >= 0xa4),
           * which contain valid characters. */
          continue;
        }
        /* Unicode character in the range U+E000 - U+F8FF are for
         * private use, they shouldn't occur in filenames.  */
        return 0;
      }
      if( ((c & 0xff) == 0xed) && ((z[i+1] & 0xe0) == 0xa0) ){
        /* Unicode character in the range U+D800 - U+DFFF are for
         * surrogate pairs, they shouldn't occur in filenames. */
        return 0;

      }
    }
    if( c=='\\' ){
      return 0;
    }
    if( c=='/' ){
      if( z[i+1]=='/' ) return 0;







>
>
>
>

|


>






>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
>







485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
**
**     *  Does not begin with "/"
**     *  Does not contain any path element named "." or ".."
**     *  Does not contain any of these characters in the path: "\"
**     *  Does not end with "/".
**     *  Does not contain two or more "/" characters in a row.
**     *  Contains at least one character
**
** Invalid UTF8 characters result in a false return if bStrictUtf8 is
** true.  If bStrictUtf8 is false, invalid UTF8 characters are silently
** ignored.
*/
int file_is_simple_pathname(const char *z, int bStrictUtf8){
  int i;
  char c = z[0];
  char maskNonAscii = bStrictUtf8 ? 0x80 : 0x00;
  if( c=='/' || c==0 ) return 0;
  if( c=='.' ){
    if( z[1]=='/' || z[1]==0 ) return 0;
    if( z[1]=='.' && (z[2]=='/' || z[2]==0) ) return 0;
  }
  for(i=0; (c=z[i])!=0; i++){
    if( c & maskNonAscii ){
      if( (c & 0xf0) == 0xf0 ) {
        /* Unicode characters > U+FFFF are not supported.
         * Windows XP and earlier cannot handle them.
         */
        return 0;
      }
      if( (c & 0xf0) == 0xe0 ) {
        /* This is a 3-byte UTF-8 character */
        if ( (c & 0xfe) == 0xee ){
          /* Range U+E000 - U+FFFF (Starting with 0xee or 0xef in UTF-8 ) */
          if ( (c & 1) && ((z[i+1] & 0xff) >= 0xa4) ){
            /* But exclude U+F900 - U+FFFF (0xef followed by byte >= 0xa4),
             * which contain valid characters. */
            continue;
          }
          /* Unicode character in the range U+E000 - U+F8FF are for
           * private use, they shouldn't occur in filenames.  */
          return 0;
        }
        if( ((c & 0xff) == 0xed) && ((z[i+1] & 0xe0) == 0xa0) ){
          /* Unicode character in the range U+D800 - U+DFFF are for
           * surrogate pairs, they shouldn't occur in filenames. */
          return 0;
        }
      }
    }
    if( c=='\\' ){
      return 0;
    }
    if( c=='/' ){
      if( z[i+1]=='/' ) return 0;

Changes to src/manifest.c.

428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
...
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
        int nTarget = 0, nSrc = 0;
        zName = next_token(&x, 0);
        zTarget = next_token(&x, &nTarget);
        zSrc = next_token(&x, &nSrc);
        if( zName==0 || zTarget==0 ) goto manifest_syntax_error;      
        if( p->zAttachName!=0 ) goto manifest_syntax_error;
        defossilize(zName);
        if( !file_is_simple_pathname(zName) ){
          SYNTAX("invalid filename on A-card");
        }
        defossilize(zTarget);
        if( (nTarget!=UUID_SIZE || !validate16(zTarget, UUID_SIZE))
           && !wiki_name_is_wellformed((const unsigned char *)zTarget) ){
          SYNTAX("invalid target on A-card");
        }
................................................................................
      ** other control file.  The filename and old-name are fossil-encoded.
      */
      case 'F': {
        char *zName, *zPerm, *zPriorName;
        zName = next_token(&x,0);
        if( zName==0 ) SYNTAX("missing filename on F-card");
        defossilize(zName);
        if( !file_is_simple_pathname(zName) ){
          SYNTAX("F-card filename is not a simple path");
        }
        zUuid = next_token(&x, &sz);
        if( p->zBaseline==0 || zUuid!=0 ){
          if( sz!=UUID_SIZE ) SYNTAX("F-card UUID is the wrong size");
          if( !validate16(zUuid, UUID_SIZE) ) SYNTAX("F-card UUID invalid");
        }
        zPerm = next_token(&x,0);
        zPriorName = next_token(&x,0);
        if( zPriorName ){
          defossilize(zPriorName);
          if( !file_is_simple_pathname(zPriorName) ){
            SYNTAX("F-card old filename is not a simple path");
          }
        }
        if( p->nFile>=p->nFileAlloc ){
          p->nFileAlloc = p->nFileAlloc*2 + 10;
          p->aFile = fossil_realloc(p->aFile, 
                                    p->nFileAlloc*sizeof(p->aFile[0]) );







|







 







|











|







428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
...
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
        int nTarget = 0, nSrc = 0;
        zName = next_token(&x, 0);
        zTarget = next_token(&x, &nTarget);
        zSrc = next_token(&x, &nSrc);
        if( zName==0 || zTarget==0 ) goto manifest_syntax_error;      
        if( p->zAttachName!=0 ) goto manifest_syntax_error;
        defossilize(zName);
        if( !file_is_simple_pathname(zName, 0) ){
          SYNTAX("invalid filename on A-card");
        }
        defossilize(zTarget);
        if( (nTarget!=UUID_SIZE || !validate16(zTarget, UUID_SIZE))
           && !wiki_name_is_wellformed((const unsigned char *)zTarget) ){
          SYNTAX("invalid target on A-card");
        }
................................................................................
      ** other control file.  The filename and old-name are fossil-encoded.
      */
      case 'F': {
        char *zName, *zPerm, *zPriorName;
        zName = next_token(&x,0);
        if( zName==0 ) SYNTAX("missing filename on F-card");
        defossilize(zName);
        if( !file_is_simple_pathname(zName, 0) ){
          SYNTAX("F-card filename is not a simple path");
        }
        zUuid = next_token(&x, &sz);
        if( p->zBaseline==0 || zUuid!=0 ){
          if( sz!=UUID_SIZE ) SYNTAX("F-card UUID is the wrong size");
          if( !validate16(zUuid, UUID_SIZE) ) SYNTAX("F-card UUID invalid");
        }
        zPerm = next_token(&x,0);
        zPriorName = next_token(&x,0);
        if( zPriorName ){
          defossilize(zPriorName);
          if( !file_is_simple_pathname(zPriorName, 0) ){
            SYNTAX("F-card old filename is not a simple path");
          }
        }
        if( p->nFile>=p->nFileAlloc ){
          p->nFileAlloc = p->nFileAlloc*2 + 10;
          p->aFile = fossil_realloc(p->aFile, 
                                    p->nFileAlloc*sizeof(p->aFile[0]) );