Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:fixed an edge case in invalid_utf8 where a valid three byte seq could be misidentified
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | invalid_utf8_table
Files: files | file ages | folders
SHA1:314cdab0d49d74237ddca987c3e0faa0870373cd
User & Date: sdr 2016-06-09 23:59:49
Original User & Date: scott 2016-06-09 23:59:49
Context
2016-06-10
08:07
proposed new invalid_utf8 function check-in: e58334a0 user: sdr tags: invalid_utf8_table
2016-06-09
23:59
fixed an edge case in invalid_utf8 where a valid three byte seq could be misidentified check-in: 314cdab0 user: sdr tags: invalid_utf8_table
15:26
Add mimetypes for bz2 and bzip. check-in: c7e9625d user: drh tags: trunk
Changes

Changes to src/lookslike.c.

159
160
161
162
163
164
165





166





167
168
169
170
171
172
173
    c2 = c;
    c = *++z;
    if( c2>=0x80 ){
      if( ((c2<0xc2) || (c2>=0xf4) || ((c&0xc0)!=0x80)) &&
          (((c2!=0xf4) || (c>=0x90)) && ((c2!=0xc0) || (c!=0x80))) ){
        return LOOK_INVALID; /* Invalid UTF-8 */
      }





      c = (c2 >= 0xe0) ? (c2<<1)+1 : ' ';





    }
  }
  return (c>=0x80) ? LOOK_INVALID : 0; /* Last byte must be ASCII. */
}


/*







>
>
>
>
>

>
>
>
>
>







159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
    c2 = c;
    c = *++z;
    if( c2>=0x80 ){
      if( ((c2<0xc2) || (c2>=0xf4) || ((c&0xc0)!=0x80)) &&
          (((c2!=0xf4) || (c>=0x90)) && ((c2!=0xc0) || (c!=0x80))) ){
        return LOOK_INVALID; /* Invalid UTF-8 */
      }
      /* the first byte of the sequence is okay
      ** but we need to check the rest
      ** convert next byte to a prefix byte of the next shorter sequence
      ** or a simple space character if the two byte seq was valid
      */
      c = (c2 >= 0xe0) ? (c2<<1)+1 : ' ';
      /* edge case: if three byte sequence started with 0xe0
      ** it becomes 0xc1, which is a too short two byte sequence
      ** so fix it up to be the start of a valid two byte sequence
      */
      if (c == 0xc1) c = 0xc2;
    }
  }
  return (c>=0x80) ? LOOK_INVALID : 0; /* Last byte must be ASCII. */
}


/*