From 8bf00ad280e68e4e93941be8ddbd652b412c2565 Mon Sep 17 00:00:00 2001 From: Jonathan Dieter Date: Sat, 5 Feb 2022 17:02:13 +0000 Subject: [PATCH] Add support for detached headers Signed-off-by: Jonathan Dieter --- include/zck.h.in | 3 ++ src/lib/hash/hash.c | 41 +++++++++++++-------- src/lib/header.c | 26 +++++++++++-- src/lib/log.c | 2 +- src/lib/zck_private.h | 2 +- src/unzck.c | 86 ++++++++++++++++++++++++++++++++++++++----- src/zck_read_header.c | 6 +++ zchunk_format.txt | 30 ++++++++++----- 8 files changed, 157 insertions(+), 39 deletions(-) diff --git a/include/zck.h.in b/include/zck.h.in index 7bf320b..93f4706 100644 --- a/include/zck.h.in +++ b/include/zck.h.in @@ -226,6 +226,9 @@ char ZCK_PUBLIC_API *zck_get_header_digest(zckCtx *zck) /* Get data digest */ char ZCK_PUBLIC_API *zck_get_data_digest(zckCtx *zck) ZCK_WARN_UNUSED; +/* Get whether this context is pointing to a detached header */ +bool ZCK_PUBLIC_API zck_is_detached_header(zckCtx *zck) + ZCK_WARN_UNUSED; /******************************************************************* diff --git a/src/lib/hash/hash.c b/src/lib/hash/hash.c index 9278ec7..f0ebc6f 100644 --- a/src/lib/hash/hash.c +++ b/src/lib/hash/hash.c @@ -93,6 +93,8 @@ static int validate_checksums(zckCtx *zck, zck_log_type bad_checksums) { for(zckChunk *idx = zck->index.first; idx; idx = idx->next) { if(idx == zck->index.first && idx->length == 0) { idx->valid = 1; + if(zck->header_only) + break; continue; } @@ -120,18 +122,28 @@ static int validate_checksums(zckCtx *zck, zck_log_type bad_checksums) { idx->valid = valid_chunk; if(all_good && valid_chunk != 1) all_good = false; + if(zck->header_only) + break; } int valid_file = -1; - if(all_good) { - /* Check data checksum */ - valid_file = validate_file(zck, bad_checksums); - if(!valid_file) - return 0; + if(zck->has_uncompressed_source || zck->header_only) { + /* If we have an uncompressed source or are a detached header, + * skip meaningless full data checksum, and just set valid_file + * if the chunks (or dictionary, if we're a header) was good */ + if(all_good) + valid_file = 1; + } else { + if(all_good) { + /* Check data checksum */ + valid_file = validate_file(zck, bad_checksums); + if(!valid_file) + return 0; - /* If data checksum failed, invalidate *all* chunks */ - if(valid_file == -1) - for(zckChunk *idx = zck->index.first; idx; idx = idx->next) - idx->valid = -1; + /* If data checksum failed, invalidate *all* chunks */ + if(valid_file == -1) + for(zckChunk *idx = zck->index.first; idx; idx = idx->next) + idx->valid = -1; + } } /* Go back to beginning of data section */ @@ -473,16 +485,15 @@ int validate_header(zckCtx *zck) { return 1; } -/* Returns 1 if data hash matches, -1 if it doesn't and 0 if error */ +/* Returns 1 if data hash matches, -1 if it doesn't and 0 if error + * + * For a zchunk file with both compressed and uncompressed checksums, validate + * each chunk checksum independently, since there is no data hash */ int ZCK_PUBLIC_API zck_validate_data_checksum(zckCtx *zck) { VALIDATE_READ_BOOL(zck); if(zck->has_uncompressed_source) { - zck_log( - ZCK_LOG_DEBUG, - "Skipping full file validation since uncompressed source flag is set" - ); - return 1; + return validate_checksums(zck, ZCK_LOG_WARNING); } if(!seek_data(zck, zck->data_offset, SEEK_SET)) diff --git a/src/lib/header.c b/src/lib/header.c index 730e663..16ea3e8 100644 --- a/src/lib/header.c +++ b/src/lib/header.c @@ -74,6 +74,12 @@ static bool read_optional_element(zckCtx *zck, size_t id, size_t data_size, } static bool read_header_from_file(zckCtx *zck) { + /* Verify that lead_size and header_length have been set */ + if(zck->lead_size == 0 || zck->header_length == 0) { + set_error(zck, "Lead and header sizes are both 0. Have you run zck_read_lead() yet?"); + return false; + } + /* Allocate header and store any extra bytes at beginning of header */ zck->header = zrealloc(zck->header, zck->lead_size + zck->header_length); if (!zck->header) { @@ -102,9 +108,16 @@ static bool read_header_from_file(zckCtx *zck) { if(!hash_init(zck, &(zck->check_full_hash), &(zck->hash_type))) return false; - if(!hash_update(zck, &(zck->check_full_hash), zck->header, - zck->hdr_digest_loc)) + /* If we're reading a detached zchunk header, first five bytes will be + * different, breaking the header digest, so let's make things simple + * by forcing the first five bytes to be static */ + if(!hash_update(zck, &(zck->check_full_hash), "\0ZCK1", 5)) return false; + /* Now hash the remaining lead */ + if(!hash_update(zck, &(zck->check_full_hash), zck->header+5, + zck->hdr_digest_loc-5)) + return false; + /* And the remaining header */ if(!hash_update(zck, &(zck->check_full_hash), header, zck->header_length)) return false; int ret = validate_header(zck); @@ -472,7 +485,9 @@ static bool read_lead(zckCtx *zck) { return false; } - if(memcmp(header, "\0ZCK1", 5) != 0) { + if(memcmp(header, "\0ZHR1", 5) == 0) { + zck->header_only = true; + } else if(memcmp(header, "\0ZCK1", 5) != 0) { free(header); set_error(zck, "Invalid lead, perhaps this is not a zck file?"); return false; @@ -652,3 +667,8 @@ ssize_t ZCK_PUBLIC_API zck_get_flags(zckCtx *zck) { VALIDATE_INT(zck); return get_flags(zck); } + +bool ZCK_PUBLIC_API zck_is_detached_header(zckCtx *zck) { + VALIDATE_BOOL(zck); + return zck->header_only; +} diff --git a/src/lib/log.c b/src/lib/log.c index 42119f7..637e678 100644 --- a/src/lib/log.c +++ b/src/lib/log.c @@ -40,7 +40,7 @@ static int log_fd = 2; static int log_fd = STDERR_FILENO; #endif -static logcallback callback = NULL; +static logcallback callback = NULL; void ZCK_PUBLIC_API zck_set_log_level(zck_log_type ll) { log_level = ll; diff --git a/src/lib/zck_private.h b/src/lib/zck_private.h index 77a8c5f..e91cf90 100644 --- a/src/lib/zck_private.h +++ b/src/lib/zck_private.h @@ -245,6 +245,7 @@ struct zckCtx { size_t data_offset; size_t header_length; + bool header_only; char *header; size_t header_size; size_t hdr_digest_loc; @@ -257,7 +258,6 @@ struct zckCtx { char *sig_string; size_t sig_size; - char *prep_digest; int prep_hash_type; ssize_t prep_hdr_size; diff --git a/src/unzck.c b/src/unzck.c index fd4af6f..6b992cc 100644 --- a/src/unzck.c +++ b/src/unzck.c @@ -53,7 +53,8 @@ static struct argp_option options[] = { {"verbose", 'v', 0, 0, "Increase verbosity (can be specified more than once for debugging)"}, {"stdout", 'c', 0, 0, "Direct output to stdout"}, - {"dict", 1000, 0, 0, "Only extract the dictionary"}, + {"dict", 1000, 0, 0, "Only extract the dictionary (can't be run with --header)"}, + {"header", 1001, 0, 0, "Only extract the header (can't be run with --dict)"}, {"version", 'V', 0, 0, "Show program version"}, { 0 } }; @@ -62,6 +63,7 @@ struct arguments { char *args[1]; zck_log_type log_level; bool dict; + bool header; bool std_out; bool exit; }; @@ -85,8 +87,13 @@ static error_t parse_opt (int key, char *arg, struct argp_state *state) { version(); arguments->exit = true; break; - case 1000: + case 1000: // Header and dict can't both be set arguments->dict = true; + arguments->header = false; + break; + case 1001: // Header and dict can't both be set + arguments->header = true; + arguments->dict = false; break; case ARGP_KEY_ARG: if (state->arg_num >= 1) { @@ -126,8 +133,9 @@ int main (int argc, char *argv[]) { if(!arguments.std_out) { if(strlen(arguments.args[0]) < 5 || - strcmp(arguments.args[0] + strlen(arguments.args[0]) - 4, ".zck") != 0) { - LOG_ERROR("Not a *.zck file: %s\n", arguments.args[0]); + (strcmp(arguments.args[0] + strlen(arguments.args[0]) - 4, ".zck") != 0 && + strcmp(arguments.args[0] + strlen(arguments.args[0]) - 4, ".zhr") != 0)) { + LOG_ERROR("Not a *.zck or *.zhr file: %s\n", arguments.args[0]); exit(1); } } @@ -141,12 +149,16 @@ int main (int argc, char *argv[]) { char *out_name = NULL; if(arguments.dict) out_name = calloc(strlen(base_name) + 3, 1); // len .zck -> .zdict = +2 + else if(arguments.header) + out_name = calloc(strlen(base_name), 1); // .zck -> zhr else - out_name = calloc(strlen(base_name) - 2, 1); + out_name = calloc(strlen(base_name) - 2, 1); // strip .zck assert(out_name); snprintf(out_name, strlen(base_name) - 3, "%s", base_name); //Strip off .zck if(arguments.dict) snprintf(out_name + strlen(base_name) - 4, 7, ".zdict"); + else if(arguments.header) + snprintf(out_name + strlen(base_name) - 4, 5, ".zhr"); #ifdef _WIN32 int dst_fd = _fileno(stdout); @@ -179,6 +191,9 @@ int main (int argc, char *argv[]) { if(dict_size < 0) { LOG_ERROR("%s", zck_get_error(zck)); goto error2; + } else if(dict_size == 0) { + LOG_ERROR("%s doesn't contain a dictionary\n", arguments.args[0]); + goto error2; } data = calloc(dict_size, 1); assert(data); @@ -208,7 +223,58 @@ int main (int argc, char *argv[]) { } good_exit = true; goto error2; + } else if(arguments.header) { + if(zck_is_detached_header(zck)) { + LOG_ERROR("%s is already a detached header\n", arguments.args[0]); + goto error2; + } + + ssize_t header_size = zck_get_header_length(zck); + if(header_size == -1) { + LOG_ERROR("%s", zck_get_error(zck)); + goto error2; + } + + zckChunk *dict = zck_get_first_chunk(zck); + ssize_t dict_size = zck_get_chunk_comp_size(dict); + if(dict_size < 0) { + LOG_ERROR("%s", zck_get_error(zck)); + goto error2; + } + + data = calloc(BUF_SIZE, 1); + if(data == NULL) { + LOG_ERROR("Unable to allocate %i bytes\n", BUF_SIZE); + goto error2; + } + + if(lseek(src_fd, 5, SEEK_SET) < 0) { + perror("Unable to seek to beginning of source file"); + exit(1); + } + write(dst_fd, "\0ZHR1", 5); + for(ssize_t i=5; i