Don't set full-file checksums when using uncompressed sources
authorJonathan Dieter <jdieter@gmail.com>
Thu, 27 Jan 2022 21:05:18 +0000 (21:05 +0000)
committerJonathan Dieter <jdieter@gmail.com>
Sat, 5 Feb 2022 17:04:18 +0000 (17:04 +0000)
Signed-off-by: Jonathan Dieter <jdieter@gmail.com>
src/lib/comp/comp.c
src/lib/hash/hash.c
src/lib/index/index_create.c
src/lib/zck.c
zchunk_format.txt

index 1fb1db46f6620624ff7d7722775bf14e076acf80..4786e410cfe302eaeeec9917bd8853d94f6264e6 100644 (file)
@@ -481,9 +481,11 @@ ssize_t comp_read(zckCtx *zck, char *dst, size_t dst_size, bool use_dict) {
                           &(zck->chunk_hash_type)))
                 goto hash_error;
             if(zck->comp.data_loc > 0) {
-                if(!hash_update(zck, &(zck->check_full_hash), zck->comp.data,
-                                zck->comp.data_loc))
-                    goto hash_error;
+                if(!zck->has_uncompressed_source) {
+                    if(!hash_update(zck, &(zck->check_full_hash), zck->comp.data,
+                                    zck->comp.data_loc))
+                        goto hash_error;
+                }
                 if(!hash_update(zck, &(zck->check_chunk_hash), zck->comp.data,
                                 zck->comp.data_loc))
                     goto hash_error;
@@ -528,8 +530,11 @@ ssize_t comp_read(zckCtx *zck, char *dst, size_t dst_size, bool use_dict) {
             if(!hash_init(zck, &(zck->check_chunk_hash),
                           &(zck->chunk_hash_type)))
                 goto hash_error;
-        if(!hash_update(zck, &(zck->check_full_hash), src, rb) ||
-           !hash_update(zck, &(zck->check_chunk_hash), src, rb) ||
+        if(!zck->has_uncompressed_source) {
+            if(!hash_update(zck, &(zck->check_full_hash), src, rb))
+                goto read_error;
+        }
+        if(!hash_update(zck, &(zck->check_chunk_hash), src, rb) ||
            !comp_add_to_data(zck, &(zck->comp), src, rb))
             goto read_error;
     }
index 967051a4da5ba295caf874c28ced44b2e61fbbcc..9278ec7f58164ca87052bba19e3ba00aac7a3f03 100644 (file)
@@ -108,8 +108,10 @@ static int validate_checksums(zckCtx *zck, zck_log_type bad_checksums) {
                 zck_log(ZCK_LOG_DEBUG, "No more data");
             if(!hash_update(zck, &(zck->check_chunk_hash), buf, rsize))
                 return 0;
-            if(!hash_update(zck, &(zck->check_full_hash), buf, rsize))
-                return 0;
+            if(!zck->has_uncompressed_source) {
+                if(!hash_update(zck, &(zck->check_full_hash), buf, rsize))
+                    return 0;
+            }
             rlen += rsize;
         }
         int valid_chunk = validate_chunk(idx, bad_checksums);
@@ -411,6 +413,13 @@ int validate_current_chunk(zckCtx *zck) {
 
 int validate_file(zckCtx *zck, zck_log_type bad_checksums) {
     VALIDATE_BOOL(zck);
+    if(zck->has_uncompressed_source) {
+        zck_log(
+            ZCK_LOG_DEBUG,
+            "Skipping full file validation since uncompressed source flag is set"
+        );
+        return 1;
+    }
     char *digest = hash_finalize(zck, &(zck->check_full_hash));
     if(digest == NULL) {
         set_error(zck, "Unable to calculate full file checksum");
@@ -468,6 +477,14 @@ int validate_header(zckCtx *zck) {
 int ZCK_PUBLIC_API zck_validate_data_checksum(zckCtx *zck) {
     VALIDATE_READ_BOOL(zck);
 
+    if(zck->has_uncompressed_source) {
+        zck_log(
+            ZCK_LOG_DEBUG,
+            "Skipping full file validation since uncompressed source flag is set"
+        );
+        return 1;
+    }
+
     if(!seek_data(zck, zck->data_offset, SEEK_SET))
         return 0;
     if(!hash_init(zck, &(zck->check_full_hash), &(zck->hash_type)))
index 1a23517690950c0e51131ef721b7aa0a471b8c90..200f4577ea4085bbefdd37caf703dd1070b18dc5 100644 (file)
@@ -103,6 +103,10 @@ bool index_create(zckCtx *zck) {
     if(zck->full_hash_digest == NULL)
         return false;
 
+    /* Set hash to 0s if has_uncompressed_source is set */
+    if(zck->has_uncompressed_source)
+        memset(zck->full_hash_digest, 0, zck->hash_type.digest_size);
+
     /* Set initial malloc size */
     index_malloc  = MAX_COMP_SIZE * 2;
 
@@ -197,8 +201,10 @@ bool index_add_to_chunk(zckCtx *zck, char *data, size_t comp_size,
     if(comp_size == 0)
         return true;
 
-    if(!hash_update(zck, &(zck->full_hash), data, comp_size))
-        return false;
+    if(!zck->has_uncompressed_source) {
+        if(!hash_update(zck, &(zck->full_hash), data, comp_size))
+            return false;
+    }
     if(!hash_update(zck, &(zck->work_index_hash), data, comp_size))
         return false;
 
index 15a672e66a6a628bf22ca7c69b8afb2343c0753e..61f92d8c7967e7d73971ec402c0998cc60a13b6d 100644 (file)
@@ -343,6 +343,13 @@ bool ZCK_PUBLIC_API zck_set_ioption(zckCtx *zck, zck_ioption option, ssize_t val
 
     } else if(option == ZCK_UNCOMP_HEADER) {
         zck->has_uncompressed_source = 1;
+        /* Uncompressed source requires chunk checksums to be a minimum of SHA-256 */
+        if(zck->chunk_hash_type.type == ZCK_HASH_SHA1 ||
+           zck->chunk_hash_type.type == ZCK_HASH_SHA512_128) {
+            if(!set_chunk_hash_type(zck, ZCK_HASH_SHA256))
+                return false;
+        }
+
     /* Hash options */
     } else if(option < 100) {
         /* Currently no hash options other than setting hash type, so bail */
index 5aa790f1c71a2729ddd8411ff3471cda580c4af1..ecaf2784a5570f884f6046c5a16069bddb798be6 100644 (file)
@@ -36,6 +36,11 @@ Checksum type
    0 = SHA-1
    1 = SHA-256
 
+ Note: if the file has flag 2 (uncompressed source) set, the total data
+       checksum must not be checked and should not be generated.  Also, the
+       chunk checksum must not be SHA-1 or SHA-512/128, since there is no total
+       data checksum.
+
 Header size:
  This is an integer containing the size of the header, not including the lead
 
@@ -74,7 +79,7 @@ Flags
  Current flags are:
   bit 0: File has data streams
   bit 1: File has optional elements
-  bit 2: EXPERIMENTAL: File may be applied against an uncompressed source
+  bit 2: File may be applied against an uncompressed source
 
 Compression type
  This is an integer containing the type of compression used to compress dict and
@@ -174,8 +179,7 @@ Chunk checksum
  This is the checksum of the compressed chunk, used to detect whether any two
  chunks are identical.
 
-EXPERIMENTAL: NOTE: Uncompressed chunk checksum will only exist if flag 2 is set
-                    to 1
+NOTE: Uncompressed chunk checksum will only exist if flag 2 is set to 1
 Uncompressed chunk checksum
  This is the checksum of the uncompressed chunk, used to detect whether a chunk
  from an uncompressed source is identical to the compressed chunk