Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Improve truncation of UTF-8 encoded title using a function by @florian.balmer per https://fossil-scm.org/forum/forumpost/6d90d5d99c |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA3-256: |
d076853d10a2f2f7b1812b1db4c45ef5 |
User & Date: | ashepilko 2020-03-06 17:08:26 |
Context
2020-03-06
| ||
17:27 | Limit the max length of new forum thread's title. ... (check-in: 81fb5e76 user: ashepilko tags: trunk) | |
17:08 | Improve truncation of UTF-8 encoded title using a function by @florian.balmer per https://fossil-scm.org/forum/forumpost/6d90d5d99c ... (check-in: d076853d user: ashepilko tags: trunk) | |
10:07 | Update to Unicode-13 ... (check-in: b70a76e3 user: jan.nijtmans tags: trunk) | |
Changes
Changes to src/forum.c.
︙ | ︙ | |||
553 554 555 556 557 558 559 | blob_set(&title, zThreadTitle); /* truncate the title when longer than max allowed; * in case of UTF-8 make sure the truncated string remains valid, * otherwise (different encoding?) pass as-is */ if( mxForumPostTitleLen>0 && blob_size(&title)>mxForumPostTitleLen ){ int len; | | | 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 | blob_set(&title, zThreadTitle); /* truncate the title when longer than max allowed; * in case of UTF-8 make sure the truncated string remains valid, * otherwise (different encoding?) pass as-is */ if( mxForumPostTitleLen>0 && blob_size(&title)>mxForumPostTitleLen ){ int len; len = utf8_codepoint_index(blob_str(&title), mxForumPostTitleLen); if( len ){ blob_truncate(&title, len); blob_append(&title, "...", 3); } } style_header("%s%s", blob_str(&title), blob_size(&title) ? " - Forum" : "Forum"); blob_reset(&title); |
︙ | ︙ |
Changes to src/utf8.c.
︙ | ︙ | |||
319 320 321 322 323 324 325 326 327 328 329 330 331 332 | int utf8_nearest_codepoint(const char *zString, int maxByteIndex){ int i,n; for( n=0, i=maxByteIndex; n<4 && i>=0; n++, i-- ){ if( !IsUTF8TrailByte(zString[i]) ) return i; } return maxByteIndex; } /* ** Display UTF-8 on the console. Return the number of ** Characters written. If stdout or stderr is redirected ** to a file, -1 is returned and nothing is written ** to the console. */ | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 | int utf8_nearest_codepoint(const char *zString, int maxByteIndex){ int i,n; for( n=0, i=maxByteIndex; n<4 && i>=0; n++, i-- ){ if( !IsUTF8TrailByte(zString[i]) ) return i; } return maxByteIndex; } /* ** Find the byte index corresponding to the given code point index in a UTF-8 ** string. If the string contains fewer than the given number of code points, ** the index of the end of the string (the null-terminator) is returned. ** Incomplete, ill-formed and overlong sequences are counted as one sequence. ** The invalid lead bytes 0xC0 to 0xC1 and 0xF5 to 0xF7 are allowed to initiate ** (ill-formed) 2- and 4-byte sequences, respectively, the other invalid lead ** bytes 0xF8 to 0xFF are treated as invalid 1-byte sequences (as lone trail ** bytes). */ int utf8_codepoint_index(const char *zString, int nCodePoint){ int i; /* Counted bytes. */ int lenUTF8; /* Counted UTF-8 sequences. */ if( zString==0 ) return 0; for(i=0, lenUTF8=0; zString[i]!=0 && lenUTF8<nCodePoint; i++, lenUTF8++){ char c = zString[i]; int cchUTF8=1; /* Code units consumed. */ int maxUTF8=1; /* Expected sequence length. */ if( (c&0xe0)==0xc0 )maxUTF8=2; /* UTF-8 lead byte 110vvvvv */ else if( (c&0xf0)==0xe0 )maxUTF8=3; /* UTF-8 lead byte 1110vvvv */ else if( (c&0xf8)==0xf0 )maxUTF8=4; /* UTF-8 lead byte 11110vvv */ while( cchUTF8<maxUTF8 && (zString[i+1]&0xc0)==0x80 ){ /* UTF-8 trail byte 10vvvvvv */ cchUTF8++; i++; } } return i; } /* ** Display UTF-8 on the console. Return the number of ** Characters written. If stdout or stderr is redirected ** to a file, -1 is returned and nothing is written ** to the console. */ |
︙ | ︙ |