Fossil

Check-in [5a66b6e7]
Login

Check-in [5a66b6e7]

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add the ability to display content and diffs for UTF16 text files in the web interface.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 5a66b6e7854f985a64324f74ce3a2933a2e4aa39
User & Date: drh 2014-01-14 12:44:59
References
2014-01-15
21:41
Since [5a66b6e785] conversions from Unicode (actually: UTF-16) to UTF-8 are possible on UNIX too ... (check-in: d0d7ca17 user: jan.nijtmans tags: trunk)
Context
2014-01-14
13:39
Handle utf16 text pages in the /doc webpage. ... (check-in: 1c5b51e6 user: drh tags: trunk)
12:44
Add the ability to display content and diffs for UTF16 text files in the web interface. ... (check-in: 5a66b6e7 user: drh tags: trunk)
09:48
Enable use of the TH1_DELETE_INTERP environment variable to delete the TH1 interp on exit. ... (check-in: e0f22dda user: mistachkin tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/blob.c.
221
222
223
224
225
226
227









228
229
230
231
232
233
234
/*
** Initialize a blob to a nul-terminated string.
** Any prior data in the blob is discarded.
*/
void blob_set(Blob *pBlob, const char *zStr){
  blob_init(pBlob, zStr, -1);
}










/*
** Initialize a blob to an empty string.
*/
void blob_zero(Blob *pBlob){
  static const char zEmpty[] = "";
  assert_blob_is_reset(pBlob);







>
>
>
>
>
>
>
>
>







221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
/*
** Initialize a blob to a nul-terminated string.
** Any prior data in the blob is discarded.
*/
void blob_set(Blob *pBlob, const char *zStr){
  blob_init(pBlob, zStr, -1);
}

/*
** Initialize a blob to a nul-terminated string obtained from fossil_malloc().
** The blob will take responsibility for freeing the string.
*/
void blob_set_dynamic(Blob *pBlob, char *zStr){
  blob_init(pBlob, zStr, -1);
  pBlob->xRealloc = blobReallocMalloc;
}

/*
** Initialize a blob to an empty string.
*/
void blob_zero(Blob *pBlob){
  static const char zEmpty[] = "";
  assert_blob_is_reset(pBlob);
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
** is either no BOM at all or an (le/be) UTF-16 BOM, a conversion to UTF-8 is
** done.  If useMbcs is false and there is no BOM, the input string is assumed
** to be UTF-8 already, so no conversion is done.
*/
void blob_to_utf8_no_bom(Blob *pBlob, int useMbcs){
  char *zUtf8;
  int bomSize = 0;
#if defined(_WIN32) || defined(__CYGWIN__)
  int bomReverse = 0;
#endif
  if( starts_with_utf8_bom(pBlob, &bomSize) ){
    struct Blob temp;
    zUtf8 = blob_str(pBlob) + bomSize;
    blob_zero(&temp);
    blob_append(&temp, zUtf8, -1);
    blob_swap(pBlob, &temp);
    blob_reset(&temp);
#if defined(_WIN32) || defined(__CYGWIN__)
  }else if( starts_with_utf16_bom(pBlob, &bomSize, &bomReverse) ){
    zUtf8 = blob_buffer(pBlob);
    if( bomReverse ){
      /* Found BOM, but with reversed bytes */
      unsigned int i = blob_size(pBlob);
      while( i>0 ){
        /* swap bytes of unicode representation */
        char zTemp = zUtf8[--i];
        zUtf8[i] = zUtf8[i-1];
        zUtf8[--i] = zTemp;
      }
    }
    /* Make sure the blob contains two terminating 0-bytes */
    blob_append(pBlob, "", 1);
    zUtf8 = blob_str(pBlob) + bomSize;
    zUtf8 = fossil_unicode_to_utf8(zUtf8);
    blob_zero(pBlob);
    blob_append(pBlob, zUtf8, -1);
    fossil_unicode_free(zUtf8);
#endif /* _WIN32 ||  __CYGWIN__ */
#if defined(_WIN32)
  }else if( useMbcs ){
    zUtf8 = fossil_mbcs_to_utf8(blob_str(pBlob));
    blob_reset(pBlob);
    blob_append(pBlob, zUtf8, -1);
    fossil_mbcs_free(zUtf8);
#endif /* _WIN32 */
  }
}







<

<







<
















<
|
<
<









1102
1103
1104
1105
1106
1107
1108

1109

1110
1111
1112
1113
1114
1115
1116

1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132

1133


1134
1135
1136
1137
1138
1139
1140
1141
1142
** is either no BOM at all or an (le/be) UTF-16 BOM, a conversion to UTF-8 is
** done.  If useMbcs is false and there is no BOM, the input string is assumed
** to be UTF-8 already, so no conversion is done.
*/
void blob_to_utf8_no_bom(Blob *pBlob, int useMbcs){
  char *zUtf8;
  int bomSize = 0;

  int bomReverse = 0;

  if( starts_with_utf8_bom(pBlob, &bomSize) ){
    struct Blob temp;
    zUtf8 = blob_str(pBlob) + bomSize;
    blob_zero(&temp);
    blob_append(&temp, zUtf8, -1);
    blob_swap(pBlob, &temp);
    blob_reset(&temp);

  }else if( starts_with_utf16_bom(pBlob, &bomSize, &bomReverse) ){
    zUtf8 = blob_buffer(pBlob);
    if( bomReverse ){
      /* Found BOM, but with reversed bytes */
      unsigned int i = blob_size(pBlob);
      while( i>0 ){
        /* swap bytes of unicode representation */
        char zTemp = zUtf8[--i];
        zUtf8[i] = zUtf8[i-1];
        zUtf8[--i] = zTemp;
      }
    }
    /* Make sure the blob contains two terminating 0-bytes */
    blob_append(pBlob, "", 1);
    zUtf8 = blob_str(pBlob) + bomSize;
    zUtf8 = fossil_unicode_to_utf8(zUtf8);

    blob_set_dynamic(pBlob, zUtf8);


#if defined(_WIN32)
  }else if( useMbcs ){
    zUtf8 = fossil_mbcs_to_utf8(blob_str(pBlob));
    blob_reset(pBlob);
    blob_append(pBlob, zUtf8, -1);
    fossil_mbcs_free(zUtf8);
#endif /* _WIN32 */
  }
}
Changes to src/db.c.
313
314
315
316
317
318
319




320
321
322
323
324
325
326
}
int db_bind_double(Stmt *pStmt, const char *zParamName, double rValue){
  return sqlite3_bind_double(pStmt->pStmt, paramIdx(pStmt, zParamName), rValue);
}
int db_bind_text(Stmt *pStmt, const char *zParamName, const char *zValue){
  return sqlite3_bind_text(pStmt->pStmt, paramIdx(pStmt, zParamName), zValue,
                           -1, SQLITE_STATIC);




}
int db_bind_null(Stmt *pStmt, const char *zParamName){
  return sqlite3_bind_null(pStmt->pStmt, paramIdx(pStmt, zParamName));
}
int db_bind_blob(Stmt *pStmt, const char *zParamName, Blob *pBlob){
  return sqlite3_bind_blob(pStmt->pStmt, paramIdx(pStmt, zParamName),
                          blob_buffer(pBlob), blob_size(pBlob), SQLITE_STATIC);







>
>
>
>







313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
}
int db_bind_double(Stmt *pStmt, const char *zParamName, double rValue){
  return sqlite3_bind_double(pStmt->pStmt, paramIdx(pStmt, zParamName), rValue);
}
int db_bind_text(Stmt *pStmt, const char *zParamName, const char *zValue){
  return sqlite3_bind_text(pStmt->pStmt, paramIdx(pStmt, zParamName), zValue,
                           -1, SQLITE_STATIC);
}
int db_bind_text16(Stmt *pStmt, const char *zParamName, const char *zValue){
  return sqlite3_bind_text16(pStmt->pStmt, paramIdx(pStmt, zParamName), zValue,
                             -1, SQLITE_STATIC);
}
int db_bind_null(Stmt *pStmt, const char *zParamName){
  return sqlite3_bind_null(pStmt->pStmt, paramIdx(pStmt, zParamName));
}
int db_bind_blob(Stmt *pStmt, const char *zParamName, Blob *pBlob){
  return sqlite3_bind_blob(pStmt->pStmt, paramIdx(pStmt, zParamName),
                          blob_buffer(pBlob), blob_size(pBlob), SQLITE_STATIC);
Changes to src/diff.c.
1760
1761
1762
1763
1764
1765
1766


1767
1768
1769
1770
1771
1772
1773

  if( diffFlags & DIFF_INVERT ){
    Blob *pTemp = pA_Blob;
    pA_Blob = pB_Blob;
    pB_Blob = pTemp;
  }
  ignoreEolWs = (diffFlags & DIFF_IGNORE_EOLWS)!=0;



  /* Prepare the input files */
  memset(&c, 0, sizeof(c));
  c.aFrom = break_into_lines(blob_str(pA_Blob), blob_size(pA_Blob),
                             &c.nFrom, ignoreEolWs);
  c.aTo = break_into_lines(blob_str(pB_Blob), blob_size(pB_Blob),
                           &c.nTo, ignoreEolWs);







>
>







1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775

  if( diffFlags & DIFF_INVERT ){
    Blob *pTemp = pA_Blob;
    pA_Blob = pB_Blob;
    pB_Blob = pTemp;
  }
  ignoreEolWs = (diffFlags & DIFF_IGNORE_EOLWS)!=0;
  blob_to_utf8_no_bom(pA_Blob, 0);
  blob_to_utf8_no_bom(pB_Blob, 0);

  /* Prepare the input files */
  memset(&c, 0, sizeof(c));
  c.aFrom = break_into_lines(blob_str(pA_Blob), blob_size(pA_Blob),
                             &c.nFrom, ignoreEolWs);
  c.aTo = break_into_lines(blob_str(pB_Blob), blob_size(pB_Blob),
                           &c.nTo, ignoreEolWs);
Changes to src/info.c.
1679
1680
1681
1682
1683
1684
1685

1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
    @ <iframe src="%R/raw/%T(blob_str(&downloadName))?name=%s(zUuid)"
    @   width="100%%" frameborder="0" marginwidth="0" marginheight="0"
    @   sandbox="allow-same-origin"
    @   onload="this.height = this.contentDocument.documentElement.scrollHeight;">
    @ </iframe>
  }else{
    style_submenu_element("Hex","Hex", "%s/hexdump?name=%s", g.zTop, zUuid);

    zMime = mimetype_from_content(&content);
    @ <blockquote>
    if( zMime==0 ){
      const char *zLn = P("ln");
      const char *z;
      blob_to_utf8_no_bom(&content, 0);
      z = blob_str(&content);
      if( zLn ){
        output_text_with_line_numbers(z, zLn);
      }else{
        @ <pre>
        @ %h(z)
        @ </pre>







>





<







1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691

1692
1693
1694
1695
1696
1697
1698
    @ <iframe src="%R/raw/%T(blob_str(&downloadName))?name=%s(zUuid)"
    @   width="100%%" frameborder="0" marginwidth="0" marginheight="0"
    @   sandbox="allow-same-origin"
    @   onload="this.height = this.contentDocument.documentElement.scrollHeight;">
    @ </iframe>
  }else{
    style_submenu_element("Hex","Hex", "%s/hexdump?name=%s", g.zTop, zUuid);
    blob_to_utf8_no_bom(&content, 0);
    zMime = mimetype_from_content(&content);
    @ <blockquote>
    if( zMime==0 ){
      const char *zLn = P("ln");
      const char *z;

      z = blob_str(&content);
      if( zLn ){
        output_text_with_line_numbers(z, zLn);
      }else{
        @ <pre>
        @ %h(z)
        @ </pre>
Changes to src/utf8.c.
52
53
54
55
56
57
58
59
60
61
62
63
64
65





66


67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84

85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
** Return a pointer to the translated text.
** Call fossil_unicode_free() to deallocate any memory used to store the
** returned pointer when done.
*/
char *fossil_unicode_to_utf8(const void *zUnicode){
#if defined(_WIN32) || defined(__CYGWIN__)
  int nByte = WideCharToMultiByte(CP_UTF8, 0, zUnicode, -1, 0, 0, 0, 0);
  char *zUtf = sqlite3_malloc( nByte );
  if( zUtf==0 ){
    return 0;
  }
  WideCharToMultiByte(CP_UTF8, 0, zUnicode, -1, zUtf, nByte, 0, 0);
  return zUtf;
#else





  return fossil_strdup(zUnicode);  /* TODO: implement for unix */


#endif
}

/*
** Translate UTF-8 to unicode for use in system calls.  Return a pointer to the
** translated text..  Call fossil_unicode_free() to deallocate any memory
** used to store the returned pointer when done.
*/
void *fossil_utf8_to_unicode(const char *zUtf8){
#if defined(_WIN32) || defined(__CYGWIN__)
  int nByte = MultiByteToWideChar(CP_UTF8, 0, zUtf8, -1, 0, 0);
  wchar_t *zUnicode = sqlite3_malloc( nByte * 2 );
  if( zUnicode==0 ){
    return 0;
  }
  MultiByteToWideChar(CP_UTF8, 0, zUtf8, -1, zUnicode, nByte);
  return zUnicode;
#else

  return fossil_strdup(zUtf8);  /* TODO: implement for unix */
#endif
}

/*
** Deallocate any memory that was previously allocated by
** fossil_unicode_to_utf8().
*/
void fossil_unicode_free(void *pOld){
#if defined(_WIN32) || defined(__CYGWIN__)
  sqlite3_free(pOld);
#else
  fossil_free(pOld);
#endif
}

#if defined(__APPLE__) && !defined(WITHOUT_ICONV)
# include <iconv.h>
#endif

/*







|
<
<
<



>
>
>
>
>
|
>
>











|
<
<
<



>









<
<
<

<







52
53
54
55
56
57
58
59



60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82



83
84
85
86
87
88
89
90
91
92
93
94
95



96

97
98
99
100
101
102
103
** Return a pointer to the translated text.
** Call fossil_unicode_free() to deallocate any memory used to store the
** returned pointer when done.
*/
char *fossil_unicode_to_utf8(const void *zUnicode){
#if defined(_WIN32) || defined(__CYGWIN__)
  int nByte = WideCharToMultiByte(CP_UTF8, 0, zUnicode, -1, 0, 0, 0, 0);
  char *zUtf = fossil_malloc( nByte );



  WideCharToMultiByte(CP_UTF8, 0, zUnicode, -1, zUtf, nByte, 0, 0);
  return zUtf;
#else
  static Stmt q;
  char *zUtf8;
  db_static_prepare(&q, "SELECT :utf8");
  db_bind_text16(&q, ":utf8", zUnicode);
  db_step(&q);
  zUtf8 = fossil_strdup(db_column_text(&q, 0));
  db_reset(&q);
  return zUtf8;
#endif
}

/*
** Translate UTF-8 to unicode for use in system calls.  Return a pointer to the
** translated text..  Call fossil_unicode_free() to deallocate any memory
** used to store the returned pointer when done.
*/
void *fossil_utf8_to_unicode(const char *zUtf8){
#if defined(_WIN32) || defined(__CYGWIN__)
  int nByte = MultiByteToWideChar(CP_UTF8, 0, zUtf8, -1, 0, 0);
  wchar_t *zUnicode = fossil_malloc( nByte * 2 );



  MultiByteToWideChar(CP_UTF8, 0, zUtf8, -1, zUnicode, nByte);
  return zUnicode;
#else
  assert( 0 );  /* Never used in unix */
  return fossil_strdup(zUtf8);  /* TODO: implement for unix */
#endif
}

/*
** Deallocate any memory that was previously allocated by
** fossil_unicode_to_utf8().
*/
void fossil_unicode_free(void *pOld){



  fossil_free(pOld);

}

#if defined(__APPLE__) && !defined(WITHOUT_ICONV)
# include <iconv.h>
#endif

/*