From d0bc43a07890522c44309ea4268ed5a515343926 Mon Sep 17 00:00:00 2001 From: Jonathan Dieter Date: Thu, 27 Jan 2022 21:05:18 +0000 Subject: [PATCH] Don't set full-file checksums when using uncompressed sources Signed-off-by: Jonathan Dieter --- src/lib/comp/comp.c | 15 ++++++++++----- src/lib/hash/hash.c | 21 +++++++++++++++++++-- src/lib/index/index_create.c | 10 ++++++++-- src/lib/zck.c | 7 +++++++ zchunk_format.txt | 10 +++++++--- 5 files changed, 51 insertions(+), 12 deletions(-) diff --git a/src/lib/comp/comp.c b/src/lib/comp/comp.c index 1fb1db4..4786e41 100644 --- a/src/lib/comp/comp.c +++ b/src/lib/comp/comp.c @@ -481,9 +481,11 @@ ssize_t comp_read(zckCtx *zck, char *dst, size_t dst_size, bool use_dict) { &(zck->chunk_hash_type))) goto hash_error; if(zck->comp.data_loc > 0) { - if(!hash_update(zck, &(zck->check_full_hash), zck->comp.data, - zck->comp.data_loc)) - goto hash_error; + if(!zck->has_uncompressed_source) { + if(!hash_update(zck, &(zck->check_full_hash), zck->comp.data, + zck->comp.data_loc)) + goto hash_error; + } if(!hash_update(zck, &(zck->check_chunk_hash), zck->comp.data, zck->comp.data_loc)) goto hash_error; @@ -528,8 +530,11 @@ ssize_t comp_read(zckCtx *zck, char *dst, size_t dst_size, bool use_dict) { if(!hash_init(zck, &(zck->check_chunk_hash), &(zck->chunk_hash_type))) goto hash_error; - if(!hash_update(zck, &(zck->check_full_hash), src, rb) || - !hash_update(zck, &(zck->check_chunk_hash), src, rb) || + if(!zck->has_uncompressed_source) { + if(!hash_update(zck, &(zck->check_full_hash), src, rb)) + goto read_error; + } + if(!hash_update(zck, &(zck->check_chunk_hash), src, rb) || !comp_add_to_data(zck, &(zck->comp), src, rb)) goto read_error; } diff --git a/src/lib/hash/hash.c b/src/lib/hash/hash.c index 967051a..9278ec7 100644 --- a/src/lib/hash/hash.c +++ b/src/lib/hash/hash.c @@ -108,8 +108,10 @@ static int validate_checksums(zckCtx *zck, zck_log_type bad_checksums) { zck_log(ZCK_LOG_DEBUG, "No more data"); if(!hash_update(zck, &(zck->check_chunk_hash), buf, rsize)) return 0; - if(!hash_update(zck, &(zck->check_full_hash), buf, rsize)) - return 0; + if(!zck->has_uncompressed_source) { + if(!hash_update(zck, &(zck->check_full_hash), buf, rsize)) + return 0; + } rlen += rsize; } int valid_chunk = validate_chunk(idx, bad_checksums); @@ -411,6 +413,13 @@ int validate_current_chunk(zckCtx *zck) { int validate_file(zckCtx *zck, zck_log_type bad_checksums) { VALIDATE_BOOL(zck); + if(zck->has_uncompressed_source) { + zck_log( + ZCK_LOG_DEBUG, + "Skipping full file validation since uncompressed source flag is set" + ); + return 1; + } char *digest = hash_finalize(zck, &(zck->check_full_hash)); if(digest == NULL) { set_error(zck, "Unable to calculate full file checksum"); @@ -468,6 +477,14 @@ int validate_header(zckCtx *zck) { int ZCK_PUBLIC_API zck_validate_data_checksum(zckCtx *zck) { VALIDATE_READ_BOOL(zck); + if(zck->has_uncompressed_source) { + zck_log( + ZCK_LOG_DEBUG, + "Skipping full file validation since uncompressed source flag is set" + ); + return 1; + } + if(!seek_data(zck, zck->data_offset, SEEK_SET)) return 0; if(!hash_init(zck, &(zck->check_full_hash), &(zck->hash_type))) diff --git a/src/lib/index/index_create.c b/src/lib/index/index_create.c index 1a23517..200f457 100644 --- a/src/lib/index/index_create.c +++ b/src/lib/index/index_create.c @@ -103,6 +103,10 @@ bool index_create(zckCtx *zck) { if(zck->full_hash_digest == NULL) return false; + /* Set hash to 0s if has_uncompressed_source is set */ + if(zck->has_uncompressed_source) + memset(zck->full_hash_digest, 0, zck->hash_type.digest_size); + /* Set initial malloc size */ index_malloc = MAX_COMP_SIZE * 2; @@ -197,8 +201,10 @@ bool index_add_to_chunk(zckCtx *zck, char *data, size_t comp_size, if(comp_size == 0) return true; - if(!hash_update(zck, &(zck->full_hash), data, comp_size)) - return false; + if(!zck->has_uncompressed_source) { + if(!hash_update(zck, &(zck->full_hash), data, comp_size)) + return false; + } if(!hash_update(zck, &(zck->work_index_hash), data, comp_size)) return false; diff --git a/src/lib/zck.c b/src/lib/zck.c index 15a672e..61f92d8 100644 --- a/src/lib/zck.c +++ b/src/lib/zck.c @@ -343,6 +343,13 @@ bool ZCK_PUBLIC_API zck_set_ioption(zckCtx *zck, zck_ioption option, ssize_t val } else if(option == ZCK_UNCOMP_HEADER) { zck->has_uncompressed_source = 1; + /* Uncompressed source requires chunk checksums to be a minimum of SHA-256 */ + if(zck->chunk_hash_type.type == ZCK_HASH_SHA1 || + zck->chunk_hash_type.type == ZCK_HASH_SHA512_128) { + if(!set_chunk_hash_type(zck, ZCK_HASH_SHA256)) + return false; + } + /* Hash options */ } else if(option < 100) { /* Currently no hash options other than setting hash type, so bail */ diff --git a/zchunk_format.txt b/zchunk_format.txt index 5aa790f..ecaf278 100644 --- a/zchunk_format.txt +++ b/zchunk_format.txt @@ -36,6 +36,11 @@ Checksum type 0 = SHA-1 1 = SHA-256 + Note: if the file has flag 2 (uncompressed source) set, the total data + checksum must not be checked and should not be generated. Also, the + chunk checksum must not be SHA-1 or SHA-512/128, since there is no total + data checksum. + Header size: This is an integer containing the size of the header, not including the lead @@ -74,7 +79,7 @@ Flags Current flags are: bit 0: File has data streams bit 1: File has optional elements - bit 2: EXPERIMENTAL: File may be applied against an uncompressed source + bit 2: File may be applied against an uncompressed source Compression type This is an integer containing the type of compression used to compress dict and @@ -174,8 +179,7 @@ Chunk checksum This is the checksum of the compressed chunk, used to detect whether any two chunks are identical. -EXPERIMENTAL: NOTE: Uncompressed chunk checksum will only exist if flag 2 is set - to 1 +NOTE: Uncompressed chunk checksum will only exist if flag 2 is set to 1 Uncompressed chunk checksum This is the checksum of the uncompressed chunk, used to detect whether a chunk from an uncompressed source is identical to the compressed chunk -- 2.30.2