/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim: set ts=2 et sw=2 tw=80: */ #if !defined(NET_FOSSIL_SCM_FSL_HASH_H_INCLUDED) #define NET_FOSSIL_SCM_FSL_HASH_H_INCLUDED /* Copyright 2013-2021 Stephan Beal (https://wanderinghorse.net). Derived heavily from previous work: Copyright (c) 2013 D. Richard Hipp (https://www.hwaci.com/drh/) This program is free software; you can redistribute it and/or modify it under the terms of the Simplified BSD License (also known as the "2-Clause License" or "FreeBSD License".) This program is distributed in the hope that it will be useful, but without any warranty; without even the implied warranty of merchantability or fitness for a particular purpose. ***************************************************************************** This file declares public APIs relating to generating hash values hashing. */ #include "fossil-util.h" /* MUST come first b/c of config macros */ #if !defined(FSL_SHA1_HARDENED) # define FSL_SHA1_HARDENED 1 #endif #if defined(__cplusplus) extern "C" { #endif /** Various set-in-stone constants used by the API. */ enum fsl_hash_constants { /** The length, in bytes, of fossil's hex-form SHA1 UUID strings. */ FSL_STRLEN_SHA1 = 40, /** The length, in bytes, of fossil's hex-form SHA3-256 UUID strings. */ FSL_STRLEN_K256 = 64, /** The length, in bytes, of a hex-form MD5 hash. */ FSL_STRLEN_MD5 = 32, /** Minimum length of a full UUID. */ FSL_UUID_STRLEN_MIN = FSL_STRLEN_SHA1, /** Maximum length of a full UUID. */ FSL_UUID_STRLEN_MAX = FSL_STRLEN_K256 }; /** Unique IDs for artifact hash types the library works. */ enum fsl_hash_types_t { /** Invalid hash type. */ FSL_HTYPE_ERROR = 0, /** SHA1. */ FSL_HTYPE_SHA1 = 1, /** SHA3-256. */ FSL_HTYPE_K256 = 2 }; typedef enum fsl_hash_types_t fsl_hash_types_t; typedef struct fsl_md5_cx fsl_md5_cx; typedef struct fsl_sha1_cx fsl_sha1_cx; typedef struct fsl_sha3_cx fsl_sha3_cx; /** The hash string of the initial MD5 state. Used as an optimization for some places where we need an MD5 but know it will not hash any data. Equivalent to what the md5sum command outputs for empty input: @code # md5sum < /dev/null d41d8cd98f00b204e9800998ecf8427e - @endcode */ #define FSL_MD5_INITIAL_HASH "d41d8cd98f00b204e9800998ecf8427e" /** Holds state for MD5 calculations. It is intended to be used like this: @code unsigned char digest[16]; char hex[FSL_STRLEN_MD5+1]; fsl_md5_cx cx = fsl_md5_cx_empty; // alternately: fsl_md5_init(&cx); ...call fsl_md5_update(&cx,...) any number of times to ...incrementally calculate the hash. fsl_md5_final(&cx, digest); // ends the calculation fsl_md5_digest_to_base16(digest, hex); // digest now contains the raw 16-byte MD5 digest. // hex now contains the 32-byte MD5 + a trailing NUL @endcode */ struct fsl_md5_cx { int isInit; uint32_t buf[4]; uint32_t bits[2]; unsigned char in[64]; }; #define fsl_md5_cx_empty_m { \ 1/*isInit*/, \ {/*buf*/0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476 }, \ {/*bits*/0,0}, \ {0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, \ 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, \ 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, \ 0,0,0,0}} /** A fsl_md5_cx instance which holds the initial state used for md5 calculations. Instances must either be copy-initialized from this instance or they must be passed to fsl_md5_init() before they are used. */ FSL_EXPORT const fsl_md5_cx fsl_md5_cx_empty; /** Initializes the given context pointer. It must not be NULL. This must be the first routine called on any fsl_md5_cx instances. Alternately, copy-constructing fsl_md5_cx_empty has the same effect. @see fsl_md5_update() @see fsl_md5_final() */ FSL_EXPORT void fsl_md5_init(fsl_md5_cx *cx); /** Updates cx's state to reflect the addition of the data specified by the range (buf, buf+len]. Neither cx nor buf may be NULL. This may be called an arbitrary number of times between fsl_md5_init() and fsl_md5_final(). @see fsl_md5_init() @see fsl_md5_final() */ FSL_EXPORT void fsl_md5_update(fsl_md5_cx *cx, void const * buf, fsl_size_t len); /** Finishes up the calculation of the md5 for the given context and writes a 16-byte digest value to the 2nd parameter. Use fsl_md5_digest_to_base16() to convert the digest output value to hexidecimal form. @see fsl_md5_init() @see fsl_md5_update() @see fsl_md5_digest_to_base16() */ FSL_EXPORT void fsl_md5_final(fsl_md5_cx * cx, unsigned char * digest); /** Converts an md5 digest value (from fsl_md5_final()'s 2nd parameter) to a 32-byte (FSL_STRLEN_MD5) CRC string plus a terminating NUL byte. i.e. zBuf must be at least (FSL_STRLEN_MD5+1) bytes long. @see fsl_md5_final() */ FSL_EXPORT void fsl_md5_digest_to_base16(unsigned char *digest, char *zBuf); /** The md5 counterpart of fsl_sha1sum_buffer(), identical in semantics except that its result is an MD5 hash instead of an SHA1 hash and the resulting hex string is FSL_STRLEN_MD5 bytes long plus a terminating NUL. */ FSL_EXPORT int fsl_md5sum_buffer(fsl_buffer const *pIn, fsl_buffer *pCksum); /** The md5 counterpart of fsl_sha1sum_cstr(), identical in semantics except that its result is an MD5 hash instead of an SHA1 hash and the resulting string is FSL_STRLEN_MD5 bytes long plus a terminating NUL. */ FSL_EXPORT char *fsl_md5sum_cstr(const char *zIn, fsl_int_t len); /** The MD5 counter part to fsl_sha1sum_stream(), with identical semantics except that the generated hash is an MD5 string instead of SHA1. */ FSL_EXPORT int fsl_md5sum_stream(fsl_input_f src, void * srcState, fsl_buffer *pCksum); /** Reads all input from src() and passes it through fsl_md5_update(cx,...). Returns 0 on success, FSL_RC_MISUSE if !cx or !src. If src returns a non-0 code, that code is returned from here. */ FSL_EXPORT int fsl_md5_update_stream(fsl_md5_cx *cx, fsl_input_f src, void * srcState); /** Equivalent to fsl_md5_update(cx, b->mem, b->used). Results are undefined if either pointer is invalid or NULL. */ FSL_EXPORT void fsl_md5_update_buffer(fsl_md5_cx *cx, fsl_buffer const * b); /** Passes the first len bytes of str to fsl_md5_update(cx). If len is less than 0 then fsl_strlen() is used to calculate the length. Results are undefined if either pointer is invalid or NULL. This is a no-op if !len or (len<0 && !*str). */ FSL_EXPORT void fsl_md5_update_cstr(fsl_md5_cx *cx, char const * str, fsl_int_t len); /** A fsl_md5_update_stream() proxy which updates cx to include the contents of the given file. */ FSL_EXPORT int fsl_md5_update_filename(fsl_md5_cx *cx, char const * fname); /** The MD5 counter part to fsl_sha1sum_filename(), with identical semantics except that the generated hash is an MD5 string instead of SHA1. */ FSL_EXPORT int fsl_md5sum_filename(const char *zFilename, fsl_buffer *pCksum); #if FSL_SHA1_HARDENED typedef void(*fsl_sha1h_collision_callback)(uint64_t, const uint32_t*, const uint32_t*, const uint32_t*, const uint32_t*); #endif /** Holds state for SHA1 calculations. It is intended to be used like this: @code unsigned char digest[20] char hex[FSL_STRLEN_SHA1+1]; fsl_sha1_cx cx = fsl_sha1_cx_empty; // alternately: fsl_sha1_init(&cx) ...call fsl_sha1_update(&cx,...) any number of times to ...incrementally calculate the hash. fsl_sha1_final(&cx, digest); // ends the calculation fsl_sha1_digest_to_base16(digest, hex); // digest now contains the raw 20-byte SHA1 digest. // hex now contains the 40-byte SHA1 + a trailing NUL @endcode */ struct fsl_sha1_cx { #if FSL_SHA1_HARDENED uint64_t total; uint32_t ihv[5]; unsigned char buffer[64]; int bigendian; int found_collision; int safe_hash; int detect_coll; int ubc_check; int reduced_round_coll; fsl_sha1h_collision_callback callback; uint32_t ihv1[5]; uint32_t ihv2[5]; uint32_t m1[80]; uint32_t m2[80]; uint32_t states[80][5]; #else unsigned int state[5]; unsigned int count[2]; unsigned char buffer[64]; #endif }; /** fsl_sha1_cx instance intended for in-struct copy initialization. */ #if FSL_SHA1_HARDENED #define fsl_sha1_cx_empty_m {0} #else #define fsl_sha1_cx_empty_m { \ {0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0xC3D2E1F0 }, \ {0,0}, \ {0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, \ 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0 \ } \ } #endif /** fsl_sha1_cx instance intended for copy initialization. For build config portability, the object must still be passed to fsl_sha1_init() to initialize it. */ FSL_EXPORT const fsl_sha1_cx fsl_sha1_cx_empty; /** Initializes the given context with the initial SHA1 state. This must be the first routine called on an SHA1 context, and passing this context to other SHA1 routines without first having passed it to this will lead to undefined results. @see fsl_sha1_update() @see fsl_sha1_final() */ FSL_EXPORT void fsl_sha1_init(fsl_sha1_cx *context); /** Updates the given context to include the hash of the first len bytes of the given data. @see fsl_sha1_init() @see fsl_sha1_final() */ FSL_EXPORT void fsl_sha1_update( fsl_sha1_cx *context, void const *data, fsl_size_t len); /** Add padding and finalizes the message digest. If digest is not NULL then it writes 20 bytes of digest to the 2nd parameter. If this library is configured with hardened SHA1 hashes, this function returns non-0 if a collision was detected while hashing. If it is not configured for hardened SHA1, or no collision was detected, it returns 0. @see fsl_sha1_update() @see fsl_sha1_digest_to_base16() */ FSL_EXPORT int fsl_sha1_final(fsl_sha1_cx *context, unsigned char * digest); /** A convenience form of fsl_sha1_final() which writes FSL_STRLEN_SHA1+1 bytes (hash plus terminating NUL byte) to the 2nd argument and returns a (const char *)-type cast of the 2nd argument. */ FSL_EXPORT const char * fsl_sha1_final_hex(fsl_sha1_cx *context, char * zHex); /** Convert a digest into base-16. digest must be at least 20 bytes long and hold an SHA1 digest. zBuf must be at least (FSL_STRLEN_SHA1 + 1) bytes long, to which FSL_STRLEN_SHA1 characters of hexidecimal-form SHA1 hash and 1 NUL byte will be written. @see fsl_sha1_final() */ FSL_EXPORT void fsl_sha1_digest_to_base16(unsigned char *digest, char *zBuf); /** Computes the SHA1 checksum of pIn and stores the resulting checksum in the buffer pCksum. pCksum's memory is re-used if is has any allocated to it. pCksum may == pIn, in which case this is a destructive operation (replacing the hashed data with its hash code). Return 0 on success, FSL_RC_OOM if (re)allocating pCksum fails. */ FSL_EXPORT int fsl_sha1sum_buffer(fsl_buffer const *pIn, fsl_buffer *pCksum); /** Computes the SHA1 checksum of the first len bytes of the given string. If len is negative then zInt must be NUL-terminated and fsl_strlen() is used to find its length. The result is a FSL_UUID_STRLEN-byte string (+NUL byte) returned in memory obtained from fsl_malloc(), so it must be passed to fsl_free() to free it. If NULL==zIn or !len then NULL is returned. */ FSL_EXPORT char *fsl_sha1sum_cstr(const char *zIn, fsl_int_t len); /** Consumes all input from src and calculates its SHA1 hash. The result is set in pCksum (its contents, if any, are overwritten, not appended to). Returns 0 on success. Returns FSL_RC_MISUSE if !src or !pCksum. It keeps consuming input from src() until that function reads fewer bytes than requested, at which point EOF is assumed. If src() returns non-0, that code is returned from this function. */ FSL_EXPORT int fsl_sha1sum_stream(fsl_input_f src, void * srcState, fsl_buffer *pCksum); /** A fsl_sha1sum_stream() wrapper which calculates the SHA1 of given file. Returns FSL_RC_IO if the file cannot be opened, FSL_RC_MISUSE if !zFilename or !pCksum, else as per fsl_sha1sum_stream(). TODO: the v1 impl has special behaviour for symlinks which this function lacks. For that support we need a variant of this function which takes a fsl_cx parameter (for the allow-symlinks setting). */ FSL_EXPORT int fsl_sha1sum_filename(const char *zFilename, fsl_buffer *pCksum); /** Legal values for SHA3 hash sizes, in bits: an increment of 32 bits in the inclusive range (128..512). The hexidecimal-code size, in bytes, of any given bit size in this enum is the bit size/4. */ enum fsl_sha3_hash_size { /** Sentinel value. Must be 0. */ FSL_SHA3_INVALID = 0, FSL_SHA3_128 = 128, FSL_SHA3_160 = 160, FSL_SHA3_192 = 192, FSL_SHA3_224 = 224, FSL_SHA3_256 = 256, FSL_SHA3_288 = 288, FSL_SHA3_320 = 320, FSL_SHA3_352 = 352, FSL_SHA3_384 = 384, FSL_SHA3_416 = 416, FSL_SHA3_448 = 448, FSL_SHA3_480 = 480, FSL_SHA3_512 = 512, /* Default SHA3 flavor */ FSL_SHA3_DEFAULT = 256 }; /** Type for holding SHA3 processing state. Each instance must be initialized with fsl_sha3_init(), populated with fsl_sha3_update(), and "sealed" with fsl_sha3_end(). Sample usage: @code fsl_sha3_cx cx; fsl_sha3_init(&cx, FSL_SHA3_DEFAULT); fsl_sha3_update(&cx, memory, lengthOfMemory); fsl_sha3_end(&cx); printf("Hash = %s\n", (char const *)cx.hex); @endcode After fsl_sha3_end() is called cx.hex contains the hex-string forms of the digest. Note that fsl_sha3_update() may be called an arbitrary number of times to feed in chunks of memory (e.g. to stream in arbitrarily large data). */ struct fsl_sha3_cx { union { uint64_t s[25]; /* Keccak state. 5x5 lines of 64 bits each */ unsigned char x[1600]; /* ... or 1600 bytes */ } u; unsigned nRate; /* Bytes of input accepted per Keccak iteration */ unsigned nLoaded; /* Input bytes loaded into u.x[] so far this cycle */ unsigned ixMask; /* Insert next input into u.x[nLoaded^ixMask]. */ enum fsl_sha3_hash_size size; /* Size of the hash, in bits. */ unsigned char hex[132]; /* Hex form of final digest: 56-128 bytes plus terminating NUL. */ }; /** If the given number is a valid fsl_sha3_hash_size value, its enum entry is returned, else FSL_SHA3_INVALID is returned. @see fsl_sha3_init() */ FSL_EXPORT enum fsl_sha3_hash_size fsl_sha3_hash_size_for_int(int); /** Initialize a new hash. The second argument specifies the size of the hash in bits. Results are undefined if cx is NULL or sz is not a valid positive value. After calling this, use fsl_sha3_update() to hash data and fsl_sha3_end() to finalize the hashing process and generate a digest. */ FSL_EXPORT void fsl_sha3_init2(fsl_sha3_cx *cx, enum fsl_sha3_hash_size sz); /** Equivalent to fsl_sha3_init2(cx, FSL_SHA3_DEFAULT). */ FSL_EXPORT void fsl_sha3_init(fsl_sha3_cx *cx); /** Updates cx's state to include the first len bytes of data. If cx is NULL results are undefined (segfault!). If mem is not NULL then it must be at least n bytes long. If n is 0 then this function has no side-effects. @see fsl_sha3_init() @see fsl_sha3_end() */ FSL_EXPORT void fsl_sha3_update( fsl_sha3_cx *cx, void const *data, unsigned int len); /** To be called when SHA3 hashing is complete: finishes the hash calculation and populates cx->hex with the final hash code in hexidecimal-string form. Returns the binary-form digest value, which refers to cx->size/8 bytes of memory which lives in the cx object. After this call cx->hex will be populated with cx->size/4 bytes of lower-case ASCII hex codes plus a terminating NUL byte. Potential TODO: change fsl_sha1_final() and fsl_md5_final() to use these same return semantics. @see fsl_sha3_init() @see fsl_sha3_update() */ FSL_EXPORT unsigned char const * fsl_sha3_end(fsl_sha3_cx *cx); /** SHA3-256 counterpart of fsl_sha1_digest_to_base16(). digest must be at least 32 bytes long and hold an SHA3 digest. zBuf must be at least (FSL_STRLEN_K256+1) bytes long, to which FSL_STRLEN_K256 characters of hexidecimal-form SHA3 hash and 1 NUL byte will be written @see fsl_sha3_end(). */ FSL_EXPORT void fsl_sha3_digest_to_base16(unsigned char *digest, char *zBuf); /** SHA3 counter part of fsl_sha1sum_buffer(). */ FSL_EXPORT int fsl_sha3sum_buffer(fsl_buffer const *pIn, fsl_buffer *pCksum); /** SHA3 counter part of fsl_sha1sum_cstr(). */ FSL_EXPORT char *fsl_sha3sum_cstr(const char *zIn, fsl_int_t len); /** SHA3 counterpart of fsl_sha1sum_stream(). */ FSL_EXPORT int fsl_sha3sum_stream(fsl_input_f src, void * srcState, fsl_buffer *pCksum); /** SHA3 counterpart of fsl_sha1sum_filename(). */ FSL_EXPORT int fsl_sha3sum_filename(const char *zFilename, fsl_buffer *pCksum); /** Expects zHash to be a full-length hash value of one of the fsl_hash_types_t-specified types, and nHash to be the length, in bytes, of zHash's contents (which must be the full hash length, not a prefix). If zHash can be validated as a hash, its corresponding hash type is returned, else FSL_HTYPE_ERROR is returned. */ FSL_EXPORT fsl_hash_types_t fsl_validate_hash(const char *zHash, int nHash); /** Expects (zHash, nHash) to refer to a full hash (of a supported content hash type) of pIn's contents. This routine hashes pIn's contents and, if it compares equivalent to zHash then the ID of the hash type is returned. On a mismatch, FSL_HTYPE_ERROR is returned. */ FSL_EXPORT fsl_hash_types_t fsl_verify_blob_hash(fsl_buffer const * pIn, const char *zHash, int nHash); /** Returns a human-readable name for the given hash type, or its second argument h is not a supported hash type. */ FSL_EXPORT const char * fsl_hash_type_name(fsl_hash_types_t h, const char *zUnknown); #if defined(__cplusplus) } /*extern "C"*/ #endif #endif /* NET_FOSSIL_SCM_FSL_HASH_H_INCLUDED */