Fossil

Check-in [b029ed22]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix a problem with initial indent introduced by the previous check-in, so that all regression tests from test/comment.test now succeed. Also eliminate three more calls to fossil_print(). Regarding performance, the legacy comment printing algorithm is outnumbered by factor 2-3, with these changes.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | comment-formatter-utf8
Files: files | file ages | folders
SHA1: b029ed2222b0b803a5b3ccffb3b02b3433bfc017
User & Date: florian 2018-11-15 15:16:00.000
Context
2018-11-15
15:32
Ensure the line buffer for the legacy comment printing algorithm can hold maximum-length UTF-8 sequences. ... (check-in: 29d3a2ed user: florian tags: comment-formatter-utf8)
15:16
Fix a problem with initial indent introduced by the previous check-in, so that all regression tests from test/comment.test now succeed. Also eliminate three more calls to fossil_print(). Regarding performance, the legacy comment printing algorithm is outnumbered by factor 2-3, with these changes. ... (check-in: b029ed22 user: florian tags: comment-formatter-utf8)
12:43
Add output buffering to the (non-legacy) comment printing algorithm, to reduce calls to fossil_print(). The resulting performance improvement can be up to factor 10, with a perceptible difference even for short comments (measured and tested on Windows with MSVC builds, and on Ubuntu with GCC builds). (For comparison: for the legacy comment printing algorithm, the extra UTF-8 checks added by this branch impair performance by 0.12-1.8%, depending on whether the input contains predominantly multi-byte vs. ASCII-only sequences.) ... (check-in: 16fde3ff user: florian tags: comment-formatter-utf8)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/comformat.c.
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
  *pMaxChars = COMMENT_LEGACY_LINE_LENGTH - indent;
  return -1;
#endif
}

/*
** This function checks the current line being printed against the original
** comment text.  Upon matching, it emits a new line and updates the provided
** character and line counts, if applicable.
*/
static int comment_check_orig(
  const char *zOrigText, /* [in] Original comment text ONLY, may be NULL. */
  const char *zLine,     /* [in] The comment line to print. */
  int *pCharCnt,         /* [in/out] Pointer to the line character count. */
  int *pLineCnt          /* [in/out] Pointer to the total line count. */
){
  if( zOrigText && fossil_strcmp(zLine, zOrigText)==0 ){
    fossil_print("\n");
    if( pCharCnt ) *pCharCnt = 0;
    if( pLineCnt ) (*pLineCnt)++;
    return 1;
  }
  return 0;
}








|
|








<







93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109

110
111
112
113
114
115
116
  *pMaxChars = COMMENT_LEGACY_LINE_LENGTH - indent;
  return -1;
#endif
}

/*
** This function checks the current line being printed against the original
** comment text.  Upon matching, it updates the provided character and line
** counts, if applicable.  The caller needs to emit a new line, if desired.
*/
static int comment_check_orig(
  const char *zOrigText, /* [in] Original comment text ONLY, may be NULL. */
  const char *zLine,     /* [in] The comment line to print. */
  int *pCharCnt,         /* [in/out] Pointer to the line character count. */
  int *pLineCnt          /* [in/out] Pointer to the total line count. */
){
  if( zOrigText && fossil_strcmp(zLine, zOrigText)==0 ){

    if( pCharCnt ) *pCharCnt = 0;
    if( pLineCnt ) (*pLineCnt)++;
    return 1;
  }
  return 0;
}

133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
    }
    nextIndex++;
  }
  return 0; /* NOT REACHED */
}

/*
** This function is called when printing a logical comment line to perform
** the necessary indenting.
*/
static void comment_print_indent(
  const char *zLine, /* [in] The comment line being printed. */
  int indent,        /* [in] Number of spaces to indent, zero for none. */
  int trimCrLf,      /* [in] Non-zero to trim leading/trailing CR/LF. */
  int trimSpace,     /* [in] Non-zero to trim leading/trailing spaces. */
  int *piIndex       /* [in/out] Pointer to first non-space character. */
){
  if( indent>0 ){
    fossil_print("%*s", indent, "");
  }
  if( zLine && piIndex ){
    int index = *piIndex;
    if( trimCrLf ){
      while( zLine[index]=='\r' || zLine[index]=='\n' ){ index++; }
    }
    if( trimSpace ){
      while( fossil_isspace(zLine[index]) ){ index++; }







|
|

|






<
<
<







132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148



149
150
151
152
153
154
155
    }
    nextIndex++;
  }
  return 0; /* NOT REACHED */
}

/*
** This function is called when printing a logical comment line to calculate
** the necessary indenting.  The caller needs to emit the indenting spaces.
*/
static void comment_calc_indent(
  const char *zLine, /* [in] The comment line being printed. */
  int indent,        /* [in] Number of spaces to indent, zero for none. */
  int trimCrLf,      /* [in] Non-zero to trim leading/trailing CR/LF. */
  int trimSpace,     /* [in] Non-zero to trim leading/trailing spaces. */
  int *piIndex       /* [in/out] Pointer to first non-space character. */
){



  if( zLine && piIndex ){
    int index = *piIndex;
    if( trimCrLf ){
      while( zLine[index]=='\r' || zLine[index]=='\n' ){ index++; }
    }
    if( trimSpace ){
      while( fossil_isspace(zLine[index]) ){ index++; }
177
178
179
180
181
182
183
184
185
186
187






188







189
190
191
192
193
194

195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213



214
215
216
217
218
219
220
  int trimCrLf,          /* [in] Non-zero to trim leading/trailing CR/LF. */
  int trimSpace,         /* [in] Non-zero to trim leading/trailing spaces. */
  int wordBreak,         /* [in] Non-zero to try breaking on word boundaries. */
  int origBreak,         /* [in] Non-zero to break before original comment. */
  int *pLineCnt,         /* [in/out] Pointer to the total line count. */
  const char **pzLine    /* [out] Pointer to the end of the logical line. */
){
  int index = 0, charCnt = 0, lineCnt = 0, maxChars;
  char zBuf[400]; int iBuf=0; /* Output buffer and counter. */
  if( !zLine ) return;
  if( lineChars<=0 ) return;






  comment_print_indent(zLine, indent, trimCrLf, trimSpace, &index);







  maxChars = lineChars;
  for(;;){
    int useChars = 1;
    char c = zLine[index];
    /* Flush the output buffer if there's no space left for at least one more
    ** (potentially 4-byte) UTF-8 sequence and a terminating NULL. */

    if ( iBuf>sizeof(zBuf)-5 ){
      zBuf[iBuf]=0;
      iBuf=0;
      fossil_print("%s", zBuf);
    }
    if( c==0 ){
      break;
    }else{
      if( origBreak && index>0 ){
        const char *zCurrent = &zLine[index];
        if( comment_check_orig(zOrigText, zCurrent, &charCnt, &lineCnt) ){
          /* Flush the output buffer before printing the indentation. */
          if ( iBuf>0 ){
            zBuf[iBuf]=0;
            iBuf=0;
            fossil_print("%s", zBuf);
          }
          comment_print_indent(zCurrent, origIndent, trimCrLf, trimSpace,
                               &index);



          maxChars = lineChars;
        }
      }
      index++;
    }
    if( c=='\n' ){
      lineCnt++;







|



>
>
>
>
>
>
|
>
>
>
>
>
>
>





|
>
|










<
<
|
<
<
<
|
|
>
>
>







173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215


216



217
218
219
220
221
222
223
224
225
226
227
228
  int trimCrLf,          /* [in] Non-zero to trim leading/trailing CR/LF. */
  int trimSpace,         /* [in] Non-zero to trim leading/trailing spaces. */
  int wordBreak,         /* [in] Non-zero to try breaking on word boundaries. */
  int origBreak,         /* [in] Non-zero to break before original comment. */
  int *pLineCnt,         /* [in/out] Pointer to the total line count. */
  const char **pzLine    /* [out] Pointer to the end of the logical line. */
){
  int index = 0, charCnt = 0, lineCnt = 0, maxChars, i;
  char zBuf[400]; int iBuf=0; /* Output buffer and counter. */
  if( !zLine ) return;
  if( lineChars<=0 ) return;
#if 0
  assert( indent<sizeof(zBuf)-5 );       /* See following comments to explain */
  assert( origIndent<sizeof(zBuf)-5 );   /* these limits. */
#endif
  if ( indent>sizeof(zBuf)-6 )  /* Limit initial indent to fit output buffer. */
    indent = sizeof(zBuf)-6;
  comment_calc_indent(zLine, indent, trimCrLf, trimSpace, &index);
  if ( indent>0 ){
    for ( i=0; i<indent; i++ ){
      zBuf[iBuf++] = ' ';
    }
  }
  if ( origIndent>sizeof(zBuf)-6 ) /* Limit line indent to fit output buffer. */
    origIndent = sizeof(zBuf)-6;
  maxChars = lineChars;
  for(;;){
    int useChars = 1;
    char c = zLine[index];
    /* Flush the output buffer if there's no space left for at least one more
    ** (potentially 4-byte) UTF-8 sequence, one level of indentation spaces,
    ** a new line, and a terminating NULL. */
    if ( iBuf>sizeof(zBuf)-origIndent-6 ){
      zBuf[iBuf]=0;
      iBuf=0;
      fossil_print("%s", zBuf);
    }
    if( c==0 ){
      break;
    }else{
      if( origBreak && index>0 ){
        const char *zCurrent = &zLine[index];
        if( comment_check_orig(zOrigText, zCurrent, &charCnt, &lineCnt) ){


          zBuf[iBuf++] = '\n';



          comment_calc_indent(zCurrent, origIndent, trimCrLf, trimSpace,
                              &index);
          for ( i=0; i<origIndent; i++ ){
            zBuf[iBuf++] = ' ';
          }
          maxChars = lineChars;
        }
      }
      index++;
    }
    if( c=='\n' ){
      lineCnt++;