Index: src/checkin.c
==================================================================
--- src/checkin.c
+++ src/checkin.c
@@ -882,46 +882,51 @@
   if( pnFBcard ) *pnFBcard = nFBcard;
 }
 
 /*
 ** Issue a warning and give the user an opportunity to abandon out
-** if unicode or a \r\n line ending is seen in a text file.
+** if a Unicode (UTF-16) byte-order-mark (BOM) or a \r\n line ending
+** is seen in a text file.
 */
-static void encoding_warning(const Blob *p, int crnlOk, const char *zFilename){
-  int looksLike;          /* return value of looks_like_text() */
+static void commit_warning(const Blob *p, int crnlOk, const char *zFilename){
+  int eType;              /* return value of looks_like_text() */
+  int fUnicode;           /* return value of starts_with_utf16_bom() */
   char *zMsg;             /* Warning message */
   Blob fname;             /* Relative pathname of the file */
   static int allOk = 0;   /* Set to true to disable this routine */
 
   if( allOk ) return;
-  looksLike = looks_like_text(p);
-  if( looksLike<0 ){
-    const char *type;
+  eType = looks_like_text(p);
+  fUnicode = starts_with_utf16_bom(p);
+  if( eType==-1 || fUnicode ){
+    const char *zWarning;
     Blob ans;
     char cReply;
 
-    if( looksLike&1 ){
+    if( eType==-1 && fUnicode ){
+      zWarning = "Unicode and CR/NL line endings";
+    }else if( eType==-1 ){
       if( crnlOk ){
-        return; /* We don't want CrLf warnings for this file. */
+        return; /* We don't want CR/NL warnings for this file. */
       }
-      type = "CR/NL line endings";
+      zWarning = "CR/NL line endings";
     }else{
-      type = "unicode";
+      zWarning = "Unicode";
     }
     file_relative_name(zFilename, &fname, 0);
     blob_zero(&ans);
     zMsg = mprintf(
          "%s contains %s; commit anyhow (a=all/y/N)?",
-         blob_str(&fname), type);
+         blob_str(&fname), zWarning);
     prompt_user(zMsg, &ans);
     fossil_free(zMsg);
     cReply = blob_str(&ans)[0];
     if( cReply=='a' || cReply=='A' ){
       allOk = 1;
     }else if( cReply!='y' && cReply!='Y' ){
       fossil_fatal("Abandoning commit due to %s in %s",
-                   type, blob_str(&fname));
+                   zWarning, blob_str(&fname));
     }
     blob_reset(&ans);
     blob_reset(&fname);
   }
 }
@@ -1232,11 +1237,11 @@
       /* Instead of file content, put link destination path */
       blob_read_link(&content, zFullname);
     }else{
       blob_read_from_file(&content, zFullname);
     }
-    encoding_warning(&content, crnlOk, zFullname);
+    commit_warning(&content, crnlOk, zFullname);
     if( chnged==1 && contains_merge_marker(&content) ){
       Blob fname; /* Relative pathname of the file */
 
       nConflict++;
       file_relative_name(zFullname, &fname, 0);

Index: src/diff.c
==================================================================
--- src/diff.c
+++ src/diff.c
@@ -48,11 +48,11 @@
     "cannot compute difference between binary files\n"
 
 #define DIFF_CANNOT_COMPUTE_SYMLINK \
     "cannot compute difference between symlink and regular file\n"
 
-#define looks_like_binary(blob) ((looks_like_text(blob)&1) == 0)
+#define looks_like_binary(blob) (looks_like_text((blob)) == 0)
 #endif /* INTERFACE */
 
 /*
 ** Maximum length of a line in a text file.  (8192)
 */
@@ -170,41 +170,46 @@
   *pnLine = nLine;
   return a;
 }
 
 /*
-** Returns 1, if everything OK
-** Returns 0 if the specified content appears to be binary or
-** contains a line that is too long
-** Returns -1, if the file appears text, but it contains CrLf
-** Returns -2, if the file starts with an UTF-16 BOM (le or be)
+** This function attempts to scan each logical line within the blob to
+** determine the type of content it appears to contain.  Possible return
+** values are:
+**
+**  (1) -- The content appears to consist entirely of text, with lines
+**         delimited by line-feed characters; however, the encoding may
+**         not be UTF-8.
+**
+**  (0) -- The content appears to be binary because it contains embedded
+**         NUL (\000) characters or an extremely long line.  Since this
+**         function does not understand UTF-16, it may falsely consider
+**         UTF-16 text to be binary.
+**
+** (-1) -- The content appears to consist entirely of text, with lines
+**         delimited by carriage-return, line-feed pairs; however, the
+**         encoding may not be UTF-8.
+**
 */
 int looks_like_text(const Blob *pContent){
   const char *z = blob_buffer(pContent);
   unsigned int n = blob_size(pContent);
   int j, c;
-  int result = 1;  /* Assume text with no CrLf */
+  int result = 1;  /* Assume text with no CR/NL */
 
   /* Check individual lines.
   */
   if( n==0 ) return result;  /* Empty file -> text */
   c = *z;
   if( c==0 ) return 0;  /* \000 byte in a file -> binary */
-  if ( n > 1 ){
-    if ( (c==(char)0xff) && (z[1]==(char)0xfe) ){
-      return -2;
-    } else if ( (c==(char)0xfe) && (z[1]==(char)0xff) ){
-      return -2;
-    }
-  }
   j = (c!='\n');
   while( --n>0 ){
     c = *++z; ++j;
     if( c==0 ) return 0;  /* \000 byte in a file -> binary */
     if( c=='\n' ){
       if( z[-1]=='\r' ){
-        result = -1;  /* Contains CrLf, continue */
+        result = -1;  /* Contains CR/NL, continue */
       }
       if( j>LENGTH_MASK ){
         return 0;  /* Very long line -> binary */
       }
       j = 0;
@@ -213,10 +218,28 @@
   if( j>LENGTH_MASK ){
     return 0;  /* Very long line -> binary */
   }
   return result;  /* No problems seen -> not binary */
 }
+
+/*
+** This function returns non-zero if the blob starts with a UTF-16le or
+** UTF-16be byte-order-mark (BOM).
+*/
+int starts_with_utf16_bom(const Blob *pContent){
+  const char *z = blob_buffer(pContent);
+  int c1, c2;
+
+  if( blob_size(pContent)<2 ) return 0;
+  c1 = z[0]; c2 = z[1];
+  if( (c1==(char)0xff) && (c2==(char)0xfe) ){
+    return 1;
+  }else if( (c1==(char)0xff) && (c2==(char)0xfe) ){
+    return 1;
+  }
+  return 0;
+}
 
 /*
 ** Return true if two DLine elements are identical.
 */
 static int same_dline(DLine *pA, DLine *pB){