Fossil

Check-in [13c796a4]
Login

Check-in [13c796a4]

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Micro-optimization in lookslike* functions, proving that LOOK_CR can be expressed as (LOOK_LONE_CR | LOOK_CRLF) and LOOK_LF as (LOOK_LONE_LF | LOOK_CRLF). This saves 6 assembler-instructions in the loops. Ready to be reviewed.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | pending-review
Files: files | file ages | folders
SHA1: 13c796a496ba77ebcddc776e636cedc54954c9cf
User & Date: jan.nijtmans 2015-11-12 11:37:02
Context
2015-12-21
08:49
merge trunk ... (check-in: 255327f0 user: jan.nijtmans tags: pending-review)
2015-11-12
11:37
Micro-optimization in lookslike* functions, proving that LOOK_CR can be expressed as (LOOK_LONE_CR | LOOK_CRLF) and LOOK_LF as (LOOK_LONE_LF | LOOK_CRLF). This saves 6 assembler-instructions in the loops. Ready to be reviewed. ... (check-in: 13c796a4 user: jan.nijtmans tags: pending-review)
11:32
re-shuffle LOOK_XXX flags such that LOOK_CR and LOOK_LF are last ... (check-in: b29b7fd3 user: jan.nijtmans tags: pending-review)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/lookslike.c.
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#define LOOK_CRLF    ((int)0x00000008) /* One or more CR/LF pairs were found. */
#define LOOK_LONG    ((int)0x00000010) /* An over length line was found. */
#define LOOK_ODD     ((int)0x00000020) /* An odd number of bytes was found. */
#define LOOK_SHORT   ((int)0x00000040) /* Unable to perform full check. */
#define LOOK_INVALID ((int)0x00000080) /* Invalid sequence was found. */
#define LOOK_BINARY  (LOOK_NUL | LOOK_LONG | LOOK_SHORT) /* May be binary. */
#define LOOK_EOL     (LOOK_LONE_CR | LOOK_LONE_LF | LOOK_CRLF) /* Line seps. */
#define LOOK_CR      ((int)0x00000100) /* One or more CR chars. */
#define LOOK_LF      ((int)0x00000200) /* One or more LF chars. */
#endif /* INTERFACE */


/*
** This function attempts to scan each logical line within the blob to
** determine the type of content it appears to contain.  The return value
** is a combination of one or more of the LOOK_XXX flags (see above):







|
|







44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#define LOOK_CRLF    ((int)0x00000008) /* One or more CR/LF pairs were found. */
#define LOOK_LONG    ((int)0x00000010) /* An over length line was found. */
#define LOOK_ODD     ((int)0x00000020) /* An odd number of bytes was found. */
#define LOOK_SHORT   ((int)0x00000040) /* Unable to perform full check. */
#define LOOK_INVALID ((int)0x00000080) /* Invalid sequence was found. */
#define LOOK_BINARY  (LOOK_NUL | LOOK_LONG | LOOK_SHORT) /* May be binary. */
#define LOOK_EOL     (LOOK_LONE_CR | LOOK_LONE_LF | LOOK_CRLF) /* Line seps. */
#define LOOK_CR      (LOOK_LONE_CR | LOOK_CRLF) /* One or more CR chars. */
#define LOOK_LF      (LOOK_LONE_LF | LOOK_CRLF) /* One or more LF chars. */
#endif /* INTERFACE */


/*
** This function attempts to scan each logical line within the blob to
** determine the type of content it appears to contain.  The return value
** is a combination of one or more of the LOOK_XXX flags (see above):
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
  int j, c, flags = LOOK_NONE;  /* Assume UTF-8 text, prove otherwise */

  if( n==0 ) return flags;  /* Empty file -> text */
  c = *z;
  if( c==0 ){
    flags |= LOOK_NUL;  /* NUL character in a file -> binary */
  }else if( c=='\r' ){
    flags |= LOOK_CR;
    if( n<=1 || z[1]!='\n' ){
      flags |= LOOK_LONE_CR;  /* More chars, next char is not LF */
    }
  }
  j = (c!='\n');
  if( !j ) flags |= (LOOK_LF | LOOK_LONE_LF);  /* Found LF as first char */
  while( !(flags&stopFlags) && --n>0 ){
    int c2 = c;
    c = *++z; ++j;
    if( c==0 ){
      flags |= LOOK_NUL;  /* NUL character in a file -> binary */
    }else if( c=='\n' ){
      flags |= LOOK_LF;
      if( c2=='\r' ){
        flags |= (LOOK_CR | LOOK_CRLF);  /* Found LF preceded by CR */
      }else{
        flags |= LOOK_LONE_LF;
      }
      if( j>LENGTH_MASK ){
        flags |= LOOK_LONG;  /* Very long line -> binary */
      }
      j = 0;
    }else if( c=='\r' ){
      flags |= LOOK_CR;
      if( n<=1 || z[1]!='\n' ){
        flags |= LOOK_LONE_CR;  /* More chars, next char is not LF */
      }
    }
  }
  if( n ){
    flags |= LOOK_SHORT;  /* The whole blob was not examined */







<





|






<

|








<







91
92
93
94
95
96
97

98
99
100
101
102
103
104
105
106
107
108
109

110
111
112
113
114
115
116
117
118
119

120
121
122
123
124
125
126
  int j, c, flags = LOOK_NONE;  /* Assume UTF-8 text, prove otherwise */

  if( n==0 ) return flags;  /* Empty file -> text */
  c = *z;
  if( c==0 ){
    flags |= LOOK_NUL;  /* NUL character in a file -> binary */
  }else if( c=='\r' ){

    if( n<=1 || z[1]!='\n' ){
      flags |= LOOK_LONE_CR;  /* More chars, next char is not LF */
    }
  }
  j = (c!='\n');
  if( !j ) flags |= (LOOK_LONE_LF);  /* Found LF as first char */
  while( !(flags&stopFlags) && --n>0 ){
    int c2 = c;
    c = *++z; ++j;
    if( c==0 ){
      flags |= LOOK_NUL;  /* NUL character in a file -> binary */
    }else if( c=='\n' ){

      if( c2=='\r' ){
        flags |= (LOOK_CRLF);  /* Found LF preceded by CR */
      }else{
        flags |= LOOK_LONE_LF;
      }
      if( j>LENGTH_MASK ){
        flags |= LOOK_LONG;  /* Very long line -> binary */
      }
      j = 0;
    }else if( c=='\r' ){

      if( n<=1 || z[1]!='\n' ){
        flags |= LOOK_LONE_CR;  /* More chars, next char is not LF */
      }
    }
  }
  if( n ){
    flags |= LOOK_SHORT;  /* The whole blob was not examined */
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
  c = *z;
  if( bReverse ){
    c = UTF16_SWAP(c);
  }
  if( c==0 ){
    flags |= LOOK_NUL;  /* NUL character in a file -> binary */
  }else if( c=='\r' ){
    flags |= LOOK_CR;
    if( n<(2*sizeof(WCHAR_T)) || UTF16_SWAP_IF(bReverse, z[1])!='\n' ){
      flags |= LOOK_LONE_CR;  /* More chars, next char is not LF */
    }
  }
  j = (c!='\n');
  if( !j ) flags |= (LOOK_LF | LOOK_LONE_LF);  /* Found LF as first char */
  while( !(flags&stopFlags) && ((n-=sizeof(WCHAR_T))>=sizeof(WCHAR_T)) ){
    int c2 = c;
    c = *++z;
    if( bReverse ){
      c = UTF16_SWAP(c);
    }
    ++j;
    if( c==0 ){
      flags |= LOOK_NUL;  /* NUL character in a file -> binary */
    }else if( c=='\n' ){
      flags |= LOOK_LF;
      if( c2=='\r' ){
        flags |= (LOOK_CR | LOOK_CRLF);  /* Found LF preceded by CR */
      }else{
        flags |= LOOK_LONE_LF;
      }
      if( j>UTF16_LENGTH_MASK ){
        flags |= LOOK_LONG;  /* Very long line -> binary */
      }
      j = 0;
    }else if( c=='\r' ){
      flags |= LOOK_CR;
      if( n<(2*sizeof(WCHAR_T)) || UTF16_SWAP_IF(bReverse, z[1])!='\n' ){
        flags |= LOOK_LONE_CR;  /* More chars, next char is not LF */
      }
    }
  }
  if( n ){
    flags |= LOOK_SHORT;  /* The whole blob was not examined */







<





|










<

|








<







237
238
239
240
241
242
243

244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259

260
261
262
263
264
265
266
267
268
269

270
271
272
273
274
275
276
  c = *z;
  if( bReverse ){
    c = UTF16_SWAP(c);
  }
  if( c==0 ){
    flags |= LOOK_NUL;  /* NUL character in a file -> binary */
  }else if( c=='\r' ){

    if( n<(2*sizeof(WCHAR_T)) || UTF16_SWAP_IF(bReverse, z[1])!='\n' ){
      flags |= LOOK_LONE_CR;  /* More chars, next char is not LF */
    }
  }
  j = (c!='\n');
  if( !j ) flags |= (LOOK_LONE_LF);  /* Found LF as first char */
  while( !(flags&stopFlags) && ((n-=sizeof(WCHAR_T))>=sizeof(WCHAR_T)) ){
    int c2 = c;
    c = *++z;
    if( bReverse ){
      c = UTF16_SWAP(c);
    }
    ++j;
    if( c==0 ){
      flags |= LOOK_NUL;  /* NUL character in a file -> binary */
    }else if( c=='\n' ){

      if( c2=='\r' ){
        flags |= (LOOK_CRLF);  /* Found LF preceded by CR */
      }else{
        flags |= LOOK_LONE_LF;
      }
      if( j>UTF16_LENGTH_MASK ){
        flags |= LOOK_LONG;  /* Very long line -> binary */
      }
      j = 0;
    }else if( c=='\r' ){

      if( n<(2*sizeof(WCHAR_T)) || UTF16_SWAP_IF(bReverse, z[1])!='\n' ){
        flags |= LOOK_LONE_CR;  /* More chars, next char is not LF */
      }
    }
  }
  if( n ){
    flags |= LOOK_SHORT;  /* The whole blob was not examined */