Fossil

Check-in [02ce8b4a]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Use a Blob object rather than a custom printf function in order to construct the PAX header for tarballs.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | ge-tarfix
Files: files | file ages | folders
SHA1: 02ce8b4a46822a7af941515516299701044750ad
User & Date: drh 2011-07-24 19:47:12
Original Comment: Use a Blob object rather than a custom printf function in order to construct the PAX header for tarballs.
Context
2011-07-25
11:21
Merge the ge-tarfix changes into trunk. This fixes tarball generation for repos that have very long filenames. check-in: a26940c2 user: drh tags: trunk
2011-07-24
19:47
Use a Blob object rather than a custom printf function in order to construct the PAX header for tarballs. Closed-Leaf check-in: 02ce8b4a user: drh tags: ge-tarfix
00:36
Improvements to tar generation. Uses the format documented in Posix.1-2008 to handle long file names and UTF-8. check-in: 2ef37b3b user: ge tags: ge-tarfix
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/tar.c.

26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
..
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
...
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
...
212
213
214
215
216
217
218
219
220

221
222
223
224
225
226
227
...
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
...
282
283
284
285
286
287
288
289
290

291
292
293
294
295
296
297

298
299
300
301
302
303
304
...
336
337
338
339
340
341
342
343
344
345
346
347

348
349
350
351
352
353
354

355
356
357
358
359
360
361
...
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
** State information for the tarball builder.
*/
static struct tarball_t {
  unsigned char *aHdr;      /* Space for building headers */
  char *zSpaces;            /* Spaces for padding */
  char *zPrevDir;           /* Name of directory for previous entry */
  int nPrevDirAlloc;        /* size of zPrevDir */
  char *pScratch;           /* scratch buffer used to build PAX data */
  int nScratchUsed;         /* part of buffer containing data */
  int nScratchAlloc;        /* size of buffer */
} tball;


/*
** field lengths of 'ustar' name and prefix fields.
*/
#define USTAR_NAME_LEN    100
................................................................................
  tball.aHdr = fossil_malloc(512+512);
  memset(tball.aHdr, 0, 512+512);
  tball.zSpaces = (char*)&tball.aHdr[512];
  /* zPrevDir init */
  tball.zPrevDir = NULL;
  tball.nPrevDirAlloc = 0;
  /* scratch buffer init */
  tball.pScratch = NULL;
  tball.nScratchUsed = 0;
  tball.nScratchAlloc = 0;

  memcpy(&tball.aHdr[108], "0000000", 8);  /* Owner ID */
  memcpy(&tball.aHdr[116], "0000000", 8);  /* Group ID */
  memcpy(&tball.aHdr[257], "ustar\00000", 8);  /* POSIX.1 format */
  memcpy(&tball.aHdr[265], "nobody", 7);   /* Owner name */
  memcpy(&tball.aHdr[297], "nobody", 7);   /* Group name */
  gzip_begin();
  db_multi_exec(
    "CREATE TEMP TABLE dir(name UNIQUE);"
  );
}


/*
** print to the scratch buffer
**
** used to build the Pax Interchange Format data, and create
** pseudo-file names for the header data.
**
** The buffer is grown automatically to accommodate the data.
*/
static int scratch_printf(
  const char *fmt,
  ...
){
  for(;;){
    int newSize, minSpace, n;
    /* calculate space in buffer */
    int space = tball.nScratchAlloc - tball.nScratchUsed;
    /* format the string */
    va_list vl;
    va_start(vl, fmt);
    n = vsnprintf(&tball.pScratch[tball.nScratchUsed], space, fmt, vl);
    assert(n >= 0);
    va_end(vl);
    /* if it fit we're done */
    if(n < space)
      return n;
    /* buffer too short: calculate reasonable new size */
    minSpace = tball.nScratchUsed+n+1;
    newSize = 2 * tball.nScratchAlloc;
    if(newSize < minSpace)
      newSize = minSpace;
    /* grow the buffer */
    tball.pScratch = fossil_realloc(tball.pScratch, newSize);
    tball.nScratchAlloc = newSize;
    /* loop to try again */
  }
}


/*
** verify that lla characters in 'zName' are in the
** ISO646 (=ASCII) character set.
*/
static int is_iso646_name(
  const char *zName,     /* file path */
  int nName              /* path length */
){
  int i;
  for(i = 0; i < nName; i++){
    unsigned char c = (unsigned char)zName[i];
    if(c > 0x7e)
      return 0;
  }
  return 1;
}


/*
**   copy string pSrc into pDst, truncating or padding with 0 if necessary
................................................................................
  /* only search if the string needs splitting */
  if(nName > USTAR_NAME_LEN){
    for(i = 1; i+1 < nName; i++)
      if(zName[i] == '/'){
        split = i+1;
        /* if the split position is within USTAR_NAME_LEN bytes from
         * the end we can quit */
        if(nName - split <= USTAR_NAME_LEN)
          break;
      }
  }
  return split;
}


/*
................................................................................
  const char *zName,     /* path */
  int nName,             /* path length */
  char *pName,           /* name field */
  char *pPrefix          /* prefix field */
){
  int split = find_split_pos(zName, nName);
  /* check whether both pieces fit */
  if(nName - split > USTAR_NAME_LEN || split > USTAR_PREFIX_LEN+1)
    return 0; /* no */


  /* extract name */
  padded_copy(pName, USTAR_NAME_LEN, &zName[split], nName - split);

  /* extract prefix */
  padded_copy(pPrefix, USTAR_PREFIX_LEN, zName, (split > 0 ? split - 1 : 0));

................................................................................
  char *pName,           /* name field */
  char *pPrefix,         /* prefix field */
  int bHeader            /* is this a 'x' type tar header? */
){
  int split;

  /* if this is a Pax Interchange header prepend "PaxHeader/"
   * so we can tell files apart from metadata */
  if(bHeader){
       int n;
       tball.nScratchUsed = 0;
       n = scratch_printf("PaxHeader/%*.*s", nName, nName, zName);
       zName = tball.pScratch;
       nName = n;
  }

  /* find the split position */
  split = find_split_pos(zName, nName);

  /* extract a name, truncate if needed */
  padded_copy(pName, USTAR_NAME_LEN, &zName[split], nName - split);
................................................................................
  int next10 = 1;
  int n;
  for(n = blen; n > 0; ){
    blen++; next10 *= 10;
    n /= 10;
  }
  /* adding the length extended the length field? */
  if(blen > next10)
    blen++;

  /* build the string */
  n = scratch_printf("%d %s=%*.*s\n", blen, zField, nValue, nValue, zValue);
  /* this _must_ be right */
  if(n != blen)
    fossil_fatal("internal error: PAX tar header has bad length");
  /* add length to scratch buffer */
  tball.nScratchUsed += blen;

}


/*
** set the header type, calculate the checksum and output
** the header
*/
................................................................................
  if( !is_iso646_name(zName, nName) ||
            !tar_split_path(zName, nName, tball.aHdr, &tball.aHdr[345]) ){
    int lastPage;
    /* add a file name for interoperability with older programs */
    approximate_split_path(zName, nName, tball.aHdr, &tball.aHdr[345], 1);

    /* generate the Pax Interchange path header */
    tball.nScratchUsed = 0;
    add_pax_header("path", zName, nName);

    /* set the header length, and write the header */
    sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o", tball.nScratchUsed);

    cksum_and_write_header('x');

    /* write the Pax Interchange data */
    gzip_step(tball.pScratch, tball.nScratchUsed);
    lastPage = tball.nScratchUsed % 512;
    if( lastPage!=0 )
      gzip_step(tball.zSpaces, 512 - lastPage);


    /* generate an approximate path for the regular header */
    approximate_split_path(zName, nName, tball.aHdr, &tball.aHdr[345], 0);
  }
  /* set the size */
  sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o", iSize);

................................................................................
  gzip_step(tball.zSpaces, 512);
  gzip_finish(pOut);
  fossil_free(tball.aHdr);
  tball.aHdr = 0;
  fossil_free(tball.zPrevDir);
  tball.zPrevDir = NULL;
  tball.nPrevDirAlloc = 0;
  fossil_free(tball.pScratch);
  tball.pScratch = NULL;
  tball.nScratchUsed = 0;
  tball.nScratchAlloc = 0;
}


/*
** COMMAND: test-tarball
**
** Generate a GZIP-compresssed tarball in the file given by the first argument







|
<
<







 







|
<
<












<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<












<
|







 







|
<







 







|

>







 







|
|
|
|
|
|
|







 







|

>

|

|

<
<
>







 







|



|
>



|
|
|

>







 







|
<
<
<







26
27
28
29
30
31
32
33


34
35
36
37
38
39
40
..
51
52
53
54
55
56
57
58


59
60
61
62
63
64
65
66
67
68
69
70






































71
72
73
74
75
76
77
78
79
80
81
82

83
84
85
86
87
88
89
90
...
149
150
151
152
153
154
155
156

157
158
159
160
161
162
163
...
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
...
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
...
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253


254
255
256
257
258
259
260
261
...
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
...
388
389
390
391
392
393
394
395



396
397
398
399
400
401
402
** State information for the tarball builder.
*/
static struct tarball_t {
  unsigned char *aHdr;      /* Space for building headers */
  char *zSpaces;            /* Spaces for padding */
  char *zPrevDir;           /* Name of directory for previous entry */
  int nPrevDirAlloc;        /* size of zPrevDir */
  Blob pax;                 /* PAX data */


} tball;


/*
** field lengths of 'ustar' name and prefix fields.
*/
#define USTAR_NAME_LEN    100
................................................................................
  tball.aHdr = fossil_malloc(512+512);
  memset(tball.aHdr, 0, 512+512);
  tball.zSpaces = (char*)&tball.aHdr[512];
  /* zPrevDir init */
  tball.zPrevDir = NULL;
  tball.nPrevDirAlloc = 0;
  /* scratch buffer init */
  blob_zero(&tball.pax);



  memcpy(&tball.aHdr[108], "0000000", 8);  /* Owner ID */
  memcpy(&tball.aHdr[116], "0000000", 8);  /* Group ID */
  memcpy(&tball.aHdr[257], "ustar\00000", 8);  /* POSIX.1 format */
  memcpy(&tball.aHdr[265], "nobody", 7);   /* Owner name */
  memcpy(&tball.aHdr[297], "nobody", 7);   /* Group name */
  gzip_begin();
  db_multi_exec(
    "CREATE TEMP TABLE dir(name UNIQUE);"
  );
}








































/*
** verify that lla characters in 'zName' are in the
** ISO646 (=ASCII) character set.
*/
static int is_iso646_name(
  const char *zName,     /* file path */
  int nName              /* path length */
){
  int i;
  for(i = 0; i < nName; i++){
    unsigned char c = (unsigned char)zName[i];

    if( c>0x7e ) return 0;
  }
  return 1;
}


/*
**   copy string pSrc into pDst, truncating or padding with 0 if necessary
................................................................................
  /* only search if the string needs splitting */
  if(nName > USTAR_NAME_LEN){
    for(i = 1; i+1 < nName; i++)
      if(zName[i] == '/'){
        split = i+1;
        /* if the split position is within USTAR_NAME_LEN bytes from
         * the end we can quit */
        if(nName - split <= USTAR_NAME_LEN) break;

      }
  }
  return split;
}


/*
................................................................................
  const char *zName,     /* path */
  int nName,             /* path length */
  char *pName,           /* name field */
  char *pPrefix          /* prefix field */
){
  int split = find_split_pos(zName, nName);
  /* check whether both pieces fit */
  if(nName - split > USTAR_NAME_LEN || split > USTAR_PREFIX_LEN+1){
    return 0; /* no */
  }

  /* extract name */
  padded_copy(pName, USTAR_NAME_LEN, &zName[split], nName - split);

  /* extract prefix */
  padded_copy(pPrefix, USTAR_PREFIX_LEN, zName, (split > 0 ? split - 1 : 0));

................................................................................
  char *pName,           /* name field */
  char *pPrefix,         /* prefix field */
  int bHeader            /* is this a 'x' type tar header? */
){
  int split;

  /* if this is a Pax Interchange header prepend "PaxHeader/"
  ** so we can tell files apart from metadata */
  if( bHeader ){
    int n;
    blob_reset(&tball.pax);
    blob_appendf(&tball.pax, "PaxHeader/%*.*s", nName, nName, zName);
    zName = blob_buffer(&tball.pax);
    nName = blob_size(&tball.pax);
  }

  /* find the split position */
  split = find_split_pos(zName, nName);

  /* extract a name, truncate if needed */
  padded_copy(pName, USTAR_NAME_LEN, &zName[split], nName - split);
................................................................................
  int next10 = 1;
  int n;
  for(n = blen; n > 0; ){
    blen++; next10 *= 10;
    n /= 10;
  }
  /* adding the length extended the length field? */
  if(blen > next10){
    blen++;
  }
  /* build the string */
  blob_appendf(&tball.pax, "%d %s=%*.*s\n", blen, zField, nValue, nValue, zValue);
  /* this _must_ be right */
  if(blob_size(&tball.pax) != blen){
    fossil_fatal("internal error: PAX tar header has bad length");


  }
}


/*
** set the header type, calculate the checksum and output
** the header
*/
................................................................................
  if( !is_iso646_name(zName, nName) ||
            !tar_split_path(zName, nName, tball.aHdr, &tball.aHdr[345]) ){
    int lastPage;
    /* add a file name for interoperability with older programs */
    approximate_split_path(zName, nName, tball.aHdr, &tball.aHdr[345], 1);

    /* generate the Pax Interchange path header */
    blob_reset(&tball.pax);
    add_pax_header("path", zName, nName);

    /* set the header length, and write the header */
    sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o",
                     blob_size(&tball.pax));
    cksum_and_write_header('x');

    /* write the Pax Interchange data */
    gzip_step(blob_buffer(&tball.pax), blob_size(&tball.pax));
    lastPage = blob_size(&tball.pax) % 512;
    if( lastPage!=0 ){
      gzip_step(tball.zSpaces, 512 - lastPage);
    }

    /* generate an approximate path for the regular header */
    approximate_split_path(zName, nName, tball.aHdr, &tball.aHdr[345], 0);
  }
  /* set the size */
  sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o", iSize);

................................................................................
  gzip_step(tball.zSpaces, 512);
  gzip_finish(pOut);
  fossil_free(tball.aHdr);
  tball.aHdr = 0;
  fossil_free(tball.zPrevDir);
  tball.zPrevDir = NULL;
  tball.nPrevDirAlloc = 0;
  blob_reset(&tball.pax);



}


/*
** COMMAND: test-tarball
**
** Generate a GZIP-compresssed tarball in the file given by the first argument