Fossil

Check-in [aadbbb38]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Coding style fixes.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | comment-formatter-utf8
Files: files | file ages | folders
SHA1: aadbbb38d6cd3e7c12f990e15f5ff9549d4f210b
User & Date: florian 2018-11-16 11:26:00
Context
2018-11-16
14:02
More coding style fixes. check-in: 2dca9b82 user: florian tags: comment-formatter-utf8
11:26
Coding style fixes. check-in: aadbbb38 user: florian tags: comment-formatter-utf8
11:14
For better word breaking results with the (non-legacy) comment printing algorithm, make sure the lookahead to the next space character is UTF-8-aware. Also make sure the per-line remaining character count is decremented properly for UTF-8 sequences. The neuralgic points now handle UTF-8 sequences correctly, and they could be enhanced to work with the effective display width, if required (to handle combining characters, and East Asian Wide and Fullwidth characters). check-in: c9ec3d18 user: florian tags: comment-formatter-utf8
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/comformat.c.

125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
...
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
...
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
...
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
  int index,         /* [in] The current character index being handled. */
  int *distUTF8      /* [out] Distance to next space in UTF-8 sequences. */
){
  int nextIndex = index + 1;
  int fNonASCII=0;
  for(;;){
    char c = zLine[nextIndex];
    if ( (c&0x80)==0x80 ) fNonASCII=1;
    if( c==0 || fossil_isspace(c) ){
      if ( distUTF8 ){
        if ( fNonASCII!=0 ){
          *distUTF8 = strlen_utf8(&zLine[index], nextIndex-index);
        }else{
          *distUTF8 = nextIndex-index;
        }
      }
      return nextIndex;
    }
................................................................................
  assert( lengthBytes>=0 );
#endif
  int lengthUTF8=0; /* Counted UTF-8 sequences. */
  int i;
  for( i=0; i<lengthBytes; i++ ){
    char c = zString[i];
    lengthUTF8++;
    if ( (c&0xc0)==0xc0 ){                    /* Any UTF-8 lead byte 11xxxxxx */
      int cchUTF8=1; /* Code units consumed. */
      int maxUTF8=1; /* Expected sequence length. */
      if( (c&0xe0)==0xc0 )maxUTF8=2;          /* UTF-8 lead byte 110vvvvv */
      else if( (c&0xf0)==0xe0 )maxUTF8=3;     /* UTF-8 lead byte 1110vvvv */
      else if( (c&0xf8)==0xf0 )maxUTF8=4;     /* UTF-8 lead byte 11110vvv */
      while( i<lengthBytes-1 &&
              cchUTF8<maxUTF8 &&
................................................................................
  char zBuf[400]; int iBuf=0; /* Output buffer and counter. */
  if( !zLine ) return;
  if( lineChars<=0 ) return;
#if 0
  assert( indent<sizeof(zBuf)-5 );       /* See following comments to explain */
  assert( origIndent<sizeof(zBuf)-5 );   /* these limits. */
#endif
  if ( indent>sizeof(zBuf)-6 )  /* Limit initial indent to fit output buffer. */
    indent = sizeof(zBuf)-6;
  comment_calc_indent(zLine, indent, trimCrLf, trimSpace, &index);
  if ( indent>0 ){
    for ( i=0; i<indent; i++ ){
      zBuf[iBuf++] = ' ';
    }
  }
  if ( origIndent>sizeof(zBuf)-6 ) /* Limit line indent to fit output buffer. */
    origIndent = sizeof(zBuf)-6;
  maxChars = lineChars;
  for(;;){
    int useChars = 1;
    char c = zLine[index];
    /* Flush the output buffer if there's no space left for at least one more
    ** (potentially 4-byte) UTF-8 sequence, one level of indentation spaces,
    ** a new line, and a terminating NULL. */
    if ( iBuf>sizeof(zBuf)-origIndent-6 ){
      zBuf[iBuf]=0;
      iBuf=0;
      fossil_print("%s", zBuf);
    }
    if( c==0 ){
      break;
    }else{
................................................................................
    if( c=='\n' ) break;
  }
  if( charCnt>0 ){
    zBuf[iBuf++] = '\n';
    lineCnt++;
  }
  /* Flush the remaining output buffer. */
  if ( iBuf>0 ) {
    zBuf[iBuf]=0;
    iBuf=0;
    fossil_print("%s", zBuf);
  }
  if( pLineCnt ){
    *pLineCnt += lineCnt;
  }







|

|
|







 







|







 







|


|




|








|







 







|







125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
...
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
...
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
...
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
  int index,         /* [in] The current character index being handled. */
  int *distUTF8      /* [out] Distance to next space in UTF-8 sequences. */
){
  int nextIndex = index + 1;
  int fNonASCII=0;
  for(;;){
    char c = zLine[nextIndex];
    if( (c&0x80)==0x80 ) fNonASCII=1;
    if( c==0 || fossil_isspace(c) ){
      if( distUTF8 ){
        if( fNonASCII!=0 ){
          *distUTF8 = strlen_utf8(&zLine[index], nextIndex-index);
        }else{
          *distUTF8 = nextIndex-index;
        }
      }
      return nextIndex;
    }
................................................................................
  assert( lengthBytes>=0 );
#endif
  int lengthUTF8=0; /* Counted UTF-8 sequences. */
  int i;
  for( i=0; i<lengthBytes; i++ ){
    char c = zString[i];
    lengthUTF8++;
    if( (c&0xc0)==0xc0 ){                     /* Any UTF-8 lead byte 11xxxxxx */
      int cchUTF8=1; /* Code units consumed. */
      int maxUTF8=1; /* Expected sequence length. */
      if( (c&0xe0)==0xc0 )maxUTF8=2;          /* UTF-8 lead byte 110vvvvv */
      else if( (c&0xf0)==0xe0 )maxUTF8=3;     /* UTF-8 lead byte 1110vvvv */
      else if( (c&0xf8)==0xf0 )maxUTF8=4;     /* UTF-8 lead byte 11110vvv */
      while( i<lengthBytes-1 &&
              cchUTF8<maxUTF8 &&
................................................................................
  char zBuf[400]; int iBuf=0; /* Output buffer and counter. */
  if( !zLine ) return;
  if( lineChars<=0 ) return;
#if 0
  assert( indent<sizeof(zBuf)-5 );       /* See following comments to explain */
  assert( origIndent<sizeof(zBuf)-5 );   /* these limits. */
#endif
  if( indent>sizeof(zBuf)-6 )   /* Limit initial indent to fit output buffer. */
    indent = sizeof(zBuf)-6;
  comment_calc_indent(zLine, indent, trimCrLf, trimSpace, &index);
  if( indent>0 ){
    for ( i=0; i<indent; i++ ){
      zBuf[iBuf++] = ' ';
    }
  }
  if( origIndent>sizeof(zBuf)-6 )  /* Limit line indent to fit output buffer. */
    origIndent = sizeof(zBuf)-6;
  maxChars = lineChars;
  for(;;){
    int useChars = 1;
    char c = zLine[index];
    /* Flush the output buffer if there's no space left for at least one more
    ** (potentially 4-byte) UTF-8 sequence, one level of indentation spaces,
    ** a new line, and a terminating NULL. */
    if( iBuf>sizeof(zBuf)-origIndent-6 ){
      zBuf[iBuf]=0;
      iBuf=0;
      fossil_print("%s", zBuf);
    }
    if( c==0 ){
      break;
    }else{
................................................................................
    if( c=='\n' ) break;
  }
  if( charCnt>0 ){
    zBuf[iBuf++] = '\n';
    lineCnt++;
  }
  /* Flush the remaining output buffer. */
  if( iBuf>0 ) {
    zBuf[iBuf]=0;
    iBuf=0;
    fossil_print("%s", zBuf);
  }
  if( pLineCnt ){
    *pLineCnt += lineCnt;
  }