Login
Artifact [2908faa724]
Login

Artifact 2908faa72474e5f9242973fb8aa3c68f84de6f31:


/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 
/* vim: set ts=2 et sw=2 tw=80: */
#if !defined(NET_FOSSIL_SCM_FSL_HASH_H_INCLUDED)
#define NET_FOSSIL_SCM_FSL_HASH_H_INCLUDED
/*
  Copyright 2013-2021 Stephan Beal (https://wanderinghorse.net).

  Derived heavily from previous work:

  Copyright (c) 2013 D. Richard Hipp (https://www.hwaci.com/drh/)


  This program is free software; you can redistribute it and/or
  modify it under the terms of the Simplified BSD License (also
  known as the "2-Clause License" or "FreeBSD License".)

  This program is distributed in the hope that it will be useful,
  but without any warranty; without even the implied warranty of
  merchantability or fitness for a particular purpose.

  *****************************************************************************
  This file declares public APIs relating to generating hash values
  hashing.
*/

#include "fossil-util.h" /* MUST come first b/c of config macros */
#if !defined(FSL_SHA1_HARDENED)
#  define FSL_SHA1_HARDENED 1
#endif
#if defined(__cplusplus)
extern "C" {
#endif

/**
   Various set-in-stone constants used by the API.
*/
enum fsl_hash_constants {
/**
   The length, in bytes, of fossil's hex-form SHA1 UUID strings.
*/
FSL_STRLEN_SHA1 = 40,
/**
   The length, in bytes, of fossil's hex-form SHA3-256 UUID strings.
*/
FSL_STRLEN_K256 = 64,
/**
   The length, in bytes, of a hex-form MD5 hash.
*/
FSL_STRLEN_MD5 = 32,

/** Minimum length of a full UUID. */
FSL_UUID_STRLEN_MIN = FSL_STRLEN_SHA1,
/** Maximum length of a full UUID. */
FSL_UUID_STRLEN_MAX = FSL_STRLEN_K256
};

/**
   Unique IDs for artifact hash types the library works.
*/
enum fsl_hash_types_t {
/** Invalid hash type. */
FSL_HTYPE_ERROR = 0,
/** SHA1. */
FSL_HTYPE_SHA1 = 1,
/** SHA3-256. */
FSL_HTYPE_K256 = 2
};
typedef enum fsl_hash_types_t fsl_hash_types_t;

typedef struct fsl_md5_cx fsl_md5_cx;
typedef struct fsl_sha1_cx fsl_sha1_cx;
typedef struct fsl_sha3_cx fsl_sha3_cx;

/**
   The hash string of the initial MD5 state. Used as an
   optimization for some places where we need an MD5 but know it
   will not hash any data.

   Equivalent to what the md5sum command outputs for empty input:

   @code
   # md5sum < /dev/null
   d41d8cd98f00b204e9800998ecf8427e  -
   @endcode
*/
#define FSL_MD5_INITIAL_HASH "d41d8cd98f00b204e9800998ecf8427e"

/**
   Holds state for MD5 calculations. It is intended to be used like
   this:

   @code
   unsigned char digest[16];
   char hex[FSL_STRLEN_MD5+1];
   fsl_md5_cx cx = fsl_md5_cx_empty;
   // alternately: fsl_md5_init(&cx);
   ...call fsl_md5_update(&cx,...) any number of times to
   ...incrementally calculate the hash.
   fsl_md5_final(&cx, digest); // ends the calculation
   fsl_md5_digest_to_base16(digest, hex);
   // digest now contains the raw 16-byte MD5 digest.
   // hex now contains the 32-byte MD5 + a trailing NUL
   @endcode
*/
struct fsl_md5_cx {
  int isInit;
  uint32_t buf[4];
  uint32_t bits[2];
  unsigned char in[64];
};
#define fsl_md5_cx_empty_m {                                    \
    1/*isInit*/,                                                \
    {/*buf*/0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476 },   \
    {/*bits*/0,0},                                              \
    {0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,                  \
        0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,               \
        0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,               \
        0,0,0,0}}

/**
   A fsl_md5_cx instance which holds the initial state
   used for md5 calculations. Instances must either be
   copy-initialized from this instance or they must be
   passed to fsl_md5_init() before they are used.
*/
FSL_EXPORT const fsl_md5_cx fsl_md5_cx_empty;

/**
   Initializes the given context pointer. It must not be NULL.  This
   must be the first routine called on any fsl_md5_cx instances.
   Alternately, copy-constructing fsl_md5_cx_empty has the same effect.

   @see fsl_md5_update()
   @see fsl_md5_final()
*/
FSL_EXPORT void fsl_md5_init(fsl_md5_cx *cx);

/**
   Updates cx's state to reflect the addition of the data
   specified by the range (buf, buf+len]. Neither cx nor buf may
   be NULL. This may be called an arbitrary number of times between
   fsl_md5_init() and fsl_md5_final().

   @see fsl_md5_init()
   @see fsl_md5_final()
*/
FSL_EXPORT void fsl_md5_update(fsl_md5_cx *cx, void const * buf, fsl_size_t len);

/**
   Finishes up the calculation of the md5 for the given context and
   writes a 16-byte digest value to the 2nd parameter.  Use
   fsl_md5_digest_to_base16() to convert the digest output value to
   hexidecimal form.

   @see fsl_md5_init()
   @see fsl_md5_update()
   @see fsl_md5_digest_to_base16()
*/
FSL_EXPORT void fsl_md5_final(fsl_md5_cx * cx, unsigned char * digest);

/**
   Converts an md5 digest value (from fsl_md5_final()'s 2nd
   parameter) to a 32-byte (FSL_STRLEN_MD5) CRC string plus a
   terminating NUL byte. i.e.  zBuf must be at least
   (FSL_STRLEN_MD5+1) bytes long.

   @see fsl_md5_final()
*/
FSL_EXPORT void fsl_md5_digest_to_base16(unsigned char *digest, char *zBuf);

/**
   The md5 counterpart of fsl_sha1sum_buffer(), identical in
   semantics except that its result is an MD5 hash instead of an
   SHA1 hash and the resulting hex string is FSL_STRLEN_MD5 bytes
   long plus a terminating NUL.
*/
FSL_EXPORT int fsl_md5sum_buffer(fsl_buffer const *pIn, fsl_buffer *pCksum);

/**
   The md5 counterpart of fsl_sha1sum_cstr(), identical in
   semantics except that its result is an MD5 hash instead of an
   SHA1 hash and the resulting string is FSL_STRLEN_MD5 bytes long
   plus a terminating NUL.
*/
FSL_EXPORT char *fsl_md5sum_cstr(const char *zIn, fsl_int_t len);

/**
   The MD5 counter part to fsl_sha1sum_stream(), with identical
   semantics except that the generated hash is an MD5 string
   instead of SHA1.
*/
FSL_EXPORT int fsl_md5sum_stream(fsl_input_f src, void * srcState, fsl_buffer *pCksum);

/**
   Reads all input from src() and passes it through fsl_md5_update(cx,...).
   Returns 0 on success, FSL_RC_MISUSE if !cx or !src. If src returns
   a non-0 code, that code is returned from here.
*/
FSL_EXPORT int fsl_md5_update_stream(fsl_md5_cx *cx, fsl_input_f src, void * srcState);

/**
   Equivalent to fsl_md5_update(cx, b->mem, b->used). Results are undefined
   if either pointer is invalid or NULL.
*/
FSL_EXPORT void fsl_md5_update_buffer(fsl_md5_cx *cx, fsl_buffer const * b);

/**
   Passes the first len bytes of str to fsl_md5_update(cx). If len
   is less than 0 then fsl_strlen() is used to calculate the
   length.  Results are undefined if either pointer is invalid or
   NULL. This is a no-op if !len or (len<0 && !*str).
*/
FSL_EXPORT void fsl_md5_update_cstr(fsl_md5_cx *cx, char const * str, fsl_int_t len);

/**
   A fsl_md5_update_stream() proxy which updates cx to include the
   contents of the given file.
*/
FSL_EXPORT int fsl_md5_update_filename(fsl_md5_cx *cx, char const * fname);

/**
   The MD5 counter part to fsl_sha1sum_filename(), with identical
   semantics except that the generated hash is an MD5 string
   instead of SHA1.
*/
FSL_EXPORT int fsl_md5sum_filename(const char *zFilename, fsl_buffer *pCksum);


#if FSL_SHA1_HARDENED
typedef void(*fsl_sha1h_collision_callback)(uint64_t, const uint32_t*, const uint32_t*, const uint32_t*, const uint32_t*);
#endif
/**
   Holds state for SHA1 calculations. It is intended to be used
   like this:

   @code
   unsigned char digest[20]
   char hex[FSL_STRLEN_SHA1+1];
   fsl_sha1_cx cx = fsl_sha1_cx_empty;
   // alternately: fsl_sha1_init(&cx)
   ...call fsl_sha1_update(&cx,...) any number of times to
   ...incrementally calculate the hash.
   fsl_sha1_final(&cx, digest); // ends the calculation
   fsl_sha1_digest_to_base16(digest, hex);
   // digest now contains the raw 20-byte SHA1 digest.
   // hex now contains the 40-byte SHA1 + a trailing NUL
   @endcode
*/
struct fsl_sha1_cx {
#if FSL_SHA1_HARDENED
  uint64_t total;
  uint32_t ihv[5];
  unsigned char buffer[64];
  int bigendian;
  int found_collision;
  int safe_hash;
  int detect_coll;
  int ubc_check;
  int reduced_round_coll;
  fsl_sha1h_collision_callback callback;

  uint32_t ihv1[5];
  uint32_t ihv2[5];
  uint32_t m1[80];
  uint32_t m2[80];
  uint32_t states[80][5];
#else
  unsigned int state[5];
  unsigned int count[2];
  unsigned char buffer[64];
#endif
};
/**
   fsl_sha1_cx instance intended for in-struct copy initialization.
*/
#if FSL_SHA1_HARDENED
#define fsl_sha1_cx_empty_m {0}
#else
#define fsl_sha1_cx_empty_m {                                           \
    {0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0xC3D2E1F0 },      \
    {0,0},                                                              \
    {0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0, \
        0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0 \
        }                                                               \
  }
#endif
/**
   fsl_sha1_cx instance intended for copy initialization. For build
   config portability, the object must still be passed to
   fsl_sha1_init() to initialize it.
*/
FSL_EXPORT const fsl_sha1_cx fsl_sha1_cx_empty;

/**
   Initializes the given context with the initial SHA1 state.  This
   must be the first routine called on an SHA1 context, and passing
   this context to other SHA1 routines without first having passed
   it to this will lead to undefined results.

   @see fsl_sha1_update()
   @see fsl_sha1_final()
*/
FSL_EXPORT void fsl_sha1_init(fsl_sha1_cx *context);

/**
   Updates the given context to include the hash of the first len
   bytes of the given data.

   @see fsl_sha1_init()
   @see fsl_sha1_final()
*/
FSL_EXPORT void fsl_sha1_update( fsl_sha1_cx *context, void const *data, fsl_size_t len);

/**
   Add padding and finalizes the message digest. If digest is not NULL
   then it writes 20 bytes of digest to the 2nd parameter. If this
   library is configured with hardened SHA1 hashes, this function
   returns non-0 if a collision was detected while hashing. If it is
   not configured for hardened SHA1, or no collision was detected, it
   returns 0.

   @see fsl_sha1_update()
   @see fsl_sha1_digest_to_base16()
*/
FSL_EXPORT int fsl_sha1_final(fsl_sha1_cx *context, unsigned char * digest);

/**
   A convenience form of fsl_sha1_final() which writes
   FSL_STRLEN_SHA1+1 bytes (hash plus terminating NUL byte) to the
   2nd argument and returns a (const char *)-type cast of the 2nd
   argument.
*/
FSL_EXPORT const char * fsl_sha1_final_hex(fsl_sha1_cx *context, char * zHex);

/**
   Convert a digest into base-16.  digest must be at least 20 bytes
   long and hold an SHA1 digest. zBuf must be at least (FSL_STRLEN_SHA1
   + 1) bytes long, to which FSL_STRLEN_SHA1 characters of
   hexidecimal-form SHA1 hash and 1 NUL byte will be written.

   @see fsl_sha1_final()
*/
FSL_EXPORT void fsl_sha1_digest_to_base16(unsigned char *digest, char *zBuf);

/**
   Computes the SHA1 checksum of pIn and stores the resulting
   checksum in the buffer pCksum.  pCksum's memory is re-used if is
   has any allocated to it. pCksum may == pIn, in which case this
   is a destructive operation (replacing the hashed data with its
   hash code).

   Return 0 on success, FSL_RC_OOM if (re)allocating pCksum fails.
*/
FSL_EXPORT int fsl_sha1sum_buffer(fsl_buffer const *pIn, fsl_buffer *pCksum);

/**
   Computes the SHA1 checksum of the first len bytes of the given
   string.  If len is negative then zInt must be NUL-terminated and
   fsl_strlen() is used to find its length.  The result is a
   FSL_UUID_STRLEN-byte string (+NUL byte) returned in memory
   obtained from fsl_malloc(), so it must be passed to fsl_free()
   to free it. If NULL==zIn or !len then NULL is returned.
*/
FSL_EXPORT char *fsl_sha1sum_cstr(const char *zIn, fsl_int_t len);

/**
   Consumes all input from src and calculates its SHA1 hash. The
   result is set in pCksum (its contents, if any, are overwritten,
   not appended to). Returns 0 on success. Returns FSL_RC_MISUSE if
   !src or !pCksum. It keeps consuming input from src() until that
   function reads fewer bytes than requested, at which point EOF is
   assumed. If src() returns non-0, that code is returned from this
   function.
*/
FSL_EXPORT int fsl_sha1sum_stream(fsl_input_f src, void * srcState, fsl_buffer *pCksum);


/**
   A fsl_sha1sum_stream() wrapper which calculates the SHA1 of
   given file.

   Returns FSL_RC_IO if the file cannot be opened, FSL_RC_MISUSE if
   !zFilename or !pCksum, else as per fsl_sha1sum_stream().

   TODO: the v1 impl has special behaviour for symlinks which this
   function lacks. For that support we need a variant of this
   function which takes a fsl_cx parameter (for the allow-symlinks
   setting).
*/
FSL_EXPORT int fsl_sha1sum_filename(const char *zFilename, fsl_buffer *pCksum);

/**
   Legal values for SHA3 hash sizes, in bits: an increment of 32 bits
   in the inclusive range (128..512).

   The hexidecimal-code size, in bytes, of any given bit size in this
   enum is the bit size/4.
*/
enum fsl_sha3_hash_size {
/** Sentinel value. Must be 0. */
FSL_SHA3_INVALID = 0,
FSL_SHA3_128 = 128, FSL_SHA3_160 = 160, FSL_SHA3_192 = 192,
FSL_SHA3_224 = 224, FSL_SHA3_256 = 256, FSL_SHA3_288 = 288,
FSL_SHA3_320 = 320, FSL_SHA3_352 = 352, FSL_SHA3_384 = 384,
FSL_SHA3_416 = 416, FSL_SHA3_448 = 448, FSL_SHA3_480 = 480,
FSL_SHA3_512 = 512,
/* Default SHA3 flavor */
FSL_SHA3_DEFAULT = 256
};

/**
   Type for holding SHA3 processing state. Each instance must be
   initialized with fsl_sha3_init(), populated with fsl_sha3_update(),
   and "sealed" with fsl_sha3_end().

   Sample usage:

   @code
   fsl_sha3_cx cx;
   fsl_sha3_init(&cx, FSL_SHA3_DEFAULT);
   fsl_sha3_update(&cx, memory, lengthOfMemory);
   fsl_sha3_end(&cx);
   printf("Hash = %s\n", (char const *)cx.hex);
   @endcode

   After fsl_sha3_end() is called cx.hex contains the hex-string forms
   of the digest. Note that fsl_sha3_update() may be called an arbitrary
   number of times to feed in chunks of memory (e.g. to stream in
   arbitrarily large data).
*/
struct fsl_sha3_cx {
    union {
        uint64_t s[25];         /* Keccak state. 5x5 lines of 64 bits each */
        unsigned char x[1600];  /* ... or 1600 bytes */
    } u;
    unsigned nRate;        /* Bytes of input accepted per Keccak iteration */
    unsigned nLoaded;      /* Input bytes loaded into u.x[] so far this cycle */
    unsigned ixMask;       /* Insert next input into u.x[nLoaded^ixMask]. */
    enum fsl_sha3_hash_size size; /* Size of the hash, in bits. */
    unsigned char hex[132]; /* Hex form of final digest: 56-128 bytes
                               plus terminating NUL. */
};

/**
   If the given number is a valid fsl_sha3_hash_size value, its enum
   entry is returned, else FSL_SHA3_INVALID is returned.

   @see fsl_sha3_init()
*/
FSL_EXPORT enum fsl_sha3_hash_size fsl_sha3_hash_size_for_int(int);

/**
   Initialize a new hash. The second argument specifies the size of
   the hash in bits. Results are undefined if cx is NULL or sz is not
   a valid positive value.

   After calling this, use fsl_sha3_update() to hash data and
   fsl_sha3_end() to finalize the hashing process and generate a digest.
*/
FSL_EXPORT void fsl_sha3_init2(fsl_sha3_cx *cx, enum fsl_sha3_hash_size sz);

/**
   Equivalent to fsl_sha3_init2(cx, FSL_SHA3_DEFAULT).
*/
FSL_EXPORT void fsl_sha3_init(fsl_sha3_cx *cx);

/**
   Updates cx's state to include the first len bytes of data.

   If cx is NULL results are undefined (segfault!). If mem is not
   NULL then it must be at least n bytes long. If n is 0 then this
   function has no side-effects.

   @see fsl_sha3_init()
   @see fsl_sha3_end()
*/
FSL_EXPORT void fsl_sha3_update( fsl_sha3_cx *cx, void const *data, unsigned int len);

/**
   To be called when SHA3 hashing is complete: finishes the hash
   calculation and populates cx->hex with the final hash code in
   hexidecimal-string form. Returns the binary-form digest value,
   which refers to cx->size/8 bytes of memory which lives in the cx
   object. After this call cx->hex will be populated with cx->size/4
   bytes of lower-case ASCII hex codes plus a terminating NUL byte.

   Potential TODO: change fsl_sha1_final() and fsl_md5_final() to use
   these same return semantics.

   @see fsl_sha3_init()
   @see fsl_sha3_update()
*/
FSL_EXPORT unsigned char const * fsl_sha3_end(fsl_sha3_cx *cx);


/**
   SHA3-256 counterpart of fsl_sha1_digest_to_base16(). digest must be at least
   32 bytes long and hold an SHA3 digest. zBuf must be at least (FSL_STRLEN_K256+1)
   bytes long, to which FSL_STRLEN_K256 characters of
   hexidecimal-form SHA3 hash and 1 NUL byte will be written

   @see fsl_sha3_end().
*/
FSL_EXPORT void fsl_sha3_digest_to_base16(unsigned char *digest, char *zBuf);
/**
   SHA3 counter part of fsl_sha1sum_buffer().
*/
FSL_EXPORT int fsl_sha3sum_buffer(fsl_buffer const *pIn, fsl_buffer *pCksum);
/**
   SHA3 counter part of fsl_sha1sum_cstr().
*/
FSL_EXPORT char *fsl_sha3sum_cstr(const char *zIn, fsl_int_t len);
/**
   SHA3 counterpart of fsl_sha1sum_stream().
 */
FSL_EXPORT int fsl_sha3sum_stream(fsl_input_f src, void * srcState, fsl_buffer *pCksum);
/**
   SHA3 counterpart of fsl_sha1sum_filename().
 */
FSL_EXPORT int fsl_sha3sum_filename(const char *zFilename, fsl_buffer *pCksum);

/**
   Expects zHash to be a full-length hash value of one of the
   fsl_hash_types_t-specified types, and nHash to be the length, in
   bytes, of zHash's contents (which must be the full hash length, not
   a prefix). If zHash can be validated as a hash, its corresponding
   hash type is returned, else FSL_HTYPE_ERROR is returned.
*/
FSL_EXPORT fsl_hash_types_t fsl_validate_hash(const char *zHash, int nHash);

/**
   Expects (zHash, nHash) to refer to a full hash (of a supported
   content hash type) of pIn's contents. This routine hashes pIn's
   contents and, if it compares equivalent to zHash then the ID of the
   hash type is returned.  On a mismatch, FSL_HTYPE_ERROR is returned.
*/
FSL_EXPORT fsl_hash_types_t fsl_verify_blob_hash(fsl_buffer const * pIn,
                                                 const char *zHash, int nHash);

/**
   Returns a human-readable name for the given hash type, or its
   second argument h is not a supported hash type.
 */
FSL_EXPORT const char * fsl_hash_type_name(fsl_hash_types_t h, const char *zUnknown);

#if defined(__cplusplus)
} /*extern "C"*/
#endif
#endif
/* NET_FOSSIL_SCM_FSL_HASH_H_INCLUDED */