From 07c4e2bd0f690cda8957f0f0df11b87365a25e4c Mon Sep 17 00:00:00 2001 From: Jonathan Dieter Date: Wed, 21 Mar 2018 15:08:35 +0200 Subject: [PATCH] Initial work in switching to compressed integers in header and index Signed-off-by: Jonathan Dieter --- include/zck.h | 29 +++--- src/lib/comp/comp.c | 4 +- src/lib/compint.c | 97 ++++++++++++++++++++ src/lib/dl/dl.c | 23 +++-- src/lib/dl/range.c | 8 +- src/lib/hash/hash.c | 8 +- src/lib/header.c | 168 ++++++++++++++++++++++++++--------- src/lib/index/index_common.c | 10 ++- src/lib/index/index_create.c | 74 +++++++-------- src/lib/index/index_read.c | 102 +++++++++------------ src/lib/io.c | 21 +++++ src/lib/meson.build | 2 +- src/lib/zck.c | 28 +++--- src/lib/zck_private.h | 28 ++++-- src/unzck.c | 11 +-- src/zck.c | 2 +- src/zck_delta_size.c | 4 +- src/zck_dl.c | 4 +- src/zck_read_header.c | 4 +- 19 files changed, 421 insertions(+), 206 deletions(-) create mode 100644 src/lib/compint.c diff --git a/include/zck.h b/include/zck.h index b141955..efa8f7c 100644 --- a/include/zck.h +++ b/include/zck.h @@ -28,22 +28,22 @@ typedef struct zckIndex { char *digest; int digest_size; int finished; - uint64_t start; + size_t start; size_t length; struct zckIndex *next; } zckIndex; typedef struct zckIndexInfo { - uint64_t count; + size_t count; size_t length; - uint8_t hash_type; - uint8_t digest_size; + int hash_type; + size_t digest_size; zckIndex *first; } zckIndexInfo; typedef struct zckRange { - uint64_t start; - uint64_t end; + size_t start; + size_t end; struct zckRange *next; struct zckRange *prev; } zckRange; @@ -76,9 +76,10 @@ typedef struct zckDL { zckCtx *zck_create(); -void zck_free(zckCtx *zck); +void zck_free(zckCtx **zck); +void zck_clear(zckCtx *zck); int zck_init_write (zckCtx *zck, int dst_fd); -int zck_set_compression_type(zckCtx *zck, uint8_t comp_type); +int zck_set_compression_type(zckCtx *zck, int comp_type); int zck_set_comp_parameter(zckCtx *zck, int option, void *value); int zck_comp_init(zckCtx *zck); int zck_comp_close(zckCtx *zck); @@ -87,12 +88,12 @@ int zck_decompress(zckCtx *zck, const char *src, const size_t src_size, char **dst, size_t *dst_size); int zck_write_file(zckCtx *zck); int zck_read_header(zckCtx *zck, int src_fd); -int64_t zck_get_index_count(zckCtx *zck); +ssize_t zck_get_index_count(zckCtx *zck); zckIndexInfo *zck_get_index(zckCtx *zck); int zck_decompress_to_file (zckCtx *zck, int src_fd, int dst_fd); -int zck_set_full_hash_type(zckCtx *zck, uint8_t hash_type); -int zck_set_chunk_hash_type(zckCtx *zck, uint8_t hash_type); -int64_t zck_get_predata_length(zckCtx *zck); +int zck_set_full_hash_type(zckCtx *zck, int hash_type); +int zck_set_chunk_hash_type(zckCtx *zck, int hash_type); +ssize_t zck_get_header_length(zckCtx *zck); char *zck_get_index_digest(zckCtx *zck); char *zck_get_full_digest(zckCtx *zck); int zck_get_full_digest_size(zckCtx *zck); @@ -100,8 +101,8 @@ int zck_get_chunk_digest_size(zckCtx *zck); int zck_get_full_hash_type(zckCtx *zck); int zck_get_chunk_hash_type(zckCtx *zck); int zck_get_tmp_fd(); -const char *zck_hash_name_from_type(uint8_t hash_type); -const char *zck_comp_name_from_type(uint8_t comp_type); +const char *zck_hash_name_from_type(int hash_type); +const char *zck_comp_name_from_type(int comp_type); int zck_range_calc_segments(zckRangeInfo *info, unsigned int max_ranges); int zck_range_get_need_dl(zckRangeInfo *info, zckCtx *zck_src, zckCtx *zck_tgt); int zck_dl_copy_src_chunks(zckRangeInfo *info, zckCtx *src, zckCtx *tgt); diff --git a/src/lib/comp/comp.c b/src/lib/comp/comp.c index 153f2f8..03ca0b1 100644 --- a/src/lib/comp/comp.c +++ b/src/lib/comp/comp.c @@ -132,7 +132,7 @@ int zck_comp_close(zckCtx *zck) { return True; } -int zck_set_compression_type(zckCtx *zck, uint8_t type) { +int zck_set_compression_type(zckCtx *zck, int type) { zckComp *comp = &(zck->comp); /* Cannot change compression type after compression has started */ @@ -182,7 +182,7 @@ int zck_set_comp_parameter(zckCtx *zck, int option, void *value) { return True; } -const char *zck_comp_name_from_type(uint8_t comp_type) { +const char *zck_comp_name_from_type(int comp_type) { if(comp_type > 1) { snprintf(unknown+8, 4, "%i)", comp_type); return unknown; diff --git a/src/lib/compint.c b/src/lib/compint.c new file mode 100644 index 0000000..924ad15 --- /dev/null +++ b/src/lib/compint.c @@ -0,0 +1,97 @@ +/* + * Copyright 2018 Jonathan Dieter + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include "zck_private.h" + +int zck_compint_from_size(char *compint, size_t val, size_t *length) { + for(unsigned char *i = (unsigned char *)compint; ; i++) { + i[0] = val % 128; + val = (val - i[0]) / 128; + (*length)++; + if(val == 0) { + i[0] += 128; + break; + } + } + return True; +} + +int zck_compint_to_size(size_t *val, const char *compint, size_t *length) { + *val = 0; + size_t old_val = 0; + const unsigned char *i = (unsigned char *)compint; + int count = 0; + int done = False; + while(True) { + size_t c = i[0]; + if(c >= 128) { + c -= 128; + done = True; + } + /* There *must* be a more elegant way of doing c * 128**count */ + for(int f=0; f MAX_COMP_SIZE || *val < old_val) { + zck_log(ZCK_LOG_ERROR, "Number too large\n"); + *length -= count; + *val = 0; + return False; + } + old_val = *val; + } + return True; +} + +int zck_compint_from_int(char *compint, int val, size_t *length) { + if(val < 0) { + zck_log(ZCK_LOG_ERROR, "Unable to compress negative integers\n"); + return False; + } + + return zck_compint_from_size(compint, (size_t)val, length); +} + +int zck_compint_to_int(int *val, const char *compint, size_t *length) { + size_t new = (size_t)*val; + if(!zck_compint_to_size(&new, compint, length)) + return False; + *val = (int)new; + if(*val < 0) { + zck_log(ZCK_LOG_ERROR, "Overflow error: compressed int is negative\n"); + return False; + } + return True; +} diff --git a/src/lib/dl/dl.c b/src/lib/dl/dl.c index 6c59ac8..9626250 100644 --- a/src/lib/dl/dl.c +++ b/src/lib/dl/dl.c @@ -60,7 +60,7 @@ void zck_dl_free_dl_regex(zckDL *dl) { /* Write zeros to tgt->fd in location of tgt_idx */ int zck_dl_write_zero(zckCtx *tgt, zckIndex *tgt_idx) { char buf[BUF_SIZE] = {0}; - size_t tgt_data_offset = tgt->preindex_size + tgt->comp_index_size; + size_t tgt_data_offset = tgt->preindex_size + tgt->index_size; size_t to_read = tgt_idx->length; if(!zck_seek(tgt->fd, tgt_data_offset + tgt_idx->start, SEEK_SET)) return False; @@ -154,7 +154,7 @@ int zck_dl_write_range(zckDL *dl, const char *at, size_t length) { return 0; dl->write_in_chunk = idx->length; size_t offset = dl->zck->preindex_size + - dl->zck->comp_index_size; + dl->zck->index_size; if(!zck_seek(dl->dst_fd, offset + tgt_idx->start, SEEK_SET)) return 0; @@ -216,8 +216,8 @@ int zck_dl_write_and_verify(zckRangeInfo *info, zckCtx *src, zckCtx *tgt, zckIndex *src_idx, zckIndex *tgt_idx) { static char buf[BUF_SIZE] = {0}; - size_t src_data_offset = src->preindex_size + src->comp_index_size; - size_t tgt_data_offset = tgt->preindex_size + tgt->comp_index_size; + size_t src_data_offset = src->preindex_size + src->index_size; + size_t tgt_data_offset = tgt->preindex_size + tgt->index_size; size_t to_read = src_idx->length; if(!zck_seek(src->fd, src_data_offset + src_idx->start, SEEK_SET)) return False; @@ -450,22 +450,19 @@ int zck_dl_get_header(zckCtx *zck, zckDL *dl, char *url) { for(int i=0; idst_fd)) + if(!zck_read_ct_is(zck, dl->dst_fd)) return False; - start += 1; - if(!zck_read_index_size(zck, dl->dst_fd)) - return False; - start += sizeof(uint64_t); - zck_log(ZCK_LOG_DEBUG, "Index size: %llu\n", zck->comp_index_size); - if(!zck_dl_bytes(dl, url, zck->comp_index_size, start, + start += 2; + zck_log(ZCK_LOG_DEBUG, "Index size: %llu\n", zck->index_size); + if(!zck_dl_bytes(dl, url, zck->index_size, start, &buffer_len)) return False; if(!zck_read_index(zck, dl->dst_fd)) return False; zckIndexInfo *info = &(dl->info.index); info->hash_type = zck->index.hash_type; - zck_log(ZCK_LOG_DEBUG, "Writing zeros to rest of file: %llu\n", zck->index.length + zck->comp_index_size + start); - if(!zck_zero_bytes(dl, zck->index.length, zck->comp_index_size + start, &buffer_len)) + zck_log(ZCK_LOG_DEBUG, "Writing zeros to rest of file: %llu\n", zck->index.length + zck->index_size + start); + if(!zck_zero_bytes(dl, zck->index.length, zck->index_size + start, &buffer_len)) return False; return True; } diff --git a/src/lib/dl/range.c b/src/lib/dl/range.c index 9216686..c4e6968 100644 --- a/src/lib/dl/range.c +++ b/src/lib/dl/range.c @@ -103,15 +103,15 @@ int zck_range_add(zckRangeInfo *info, zckIndex *idx, zckCtx *zck) { zck_log(ZCK_LOG_ERROR, "zckRangeInfo or zckIndex not allocated\n"); return False; } - size_t predata_len = 0; + size_t header_len = 0; int add_index = False; if(zck) { - predata_len = zck_get_predata_length(zck); + header_len = zck_get_header_length(zck); add_index = True; } - size_t start = idx->start + predata_len; - size_t end = idx->start + predata_len + idx->length - 1; + size_t start = idx->start + header_len; + size_t end = idx->start + header_len + idx->length - 1; zckRange *prev = info->first; for(zckRange *ptr=info->first; ptr;) { prev = ptr; diff --git a/src/lib/hash/hash.c b/src/lib/hash/hash.c index aea1996..673dfc4 100644 --- a/src/lib/hash/hash.c +++ b/src/lib/hash/hash.c @@ -34,7 +34,7 @@ #include "sha1/sha1.h" #include "sha2/sha2.h" -static char unknown[] = "Unknown(\0\0\0\0\0"; +static char unknown[] = "Unknown(\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; static char hash_text[BUF_SIZE] = {0}; const static char *HASH_NAME[] = { @@ -131,7 +131,7 @@ void zck_hash_close(zckHash *hash) { /* Returns 1 if full file hash matches, 0 if it doesn't and -1 if failure */ int zck_hash_check_full_file(zckCtx *zck, int dst_fd) { - if(!zck_seek(dst_fd, zck->preindex_size + zck->comp_index_size, SEEK_SET)) + if(!zck_seek(dst_fd, zck->preindex_size + zck->index_size, SEEK_SET)) return -1; if(!zck_hash_init(&(zck->check_full_hash), &(zck->hash_type))) return -1; @@ -175,9 +175,9 @@ char *zck_hash_finalize(zckHash *hash) { return NULL; } -const char *zck_hash_name_from_type(uint8_t hash_type) { +const char *zck_hash_name_from_type(int hash_type) { if(hash_type > 1) { - snprintf(unknown+8, 4, "%i)", hash_type); + snprintf(unknown+8, 21, "%i)", hash_type); return unknown; } return HASH_NAME[hash_type]; diff --git a/src/lib/header.c b/src/lib/header.c index 25c50fc..aeb0964 100644 --- a/src/lib/header.c +++ b/src/lib/header.c @@ -27,83 +27,141 @@ #include #include #include +#include #include #include "zck_private.h" + int zck_read_initial(zckCtx *zck, int src_fd) { - char header[] = " "; + char *header = zmalloc(5 + MAX_COMP_SIZE); + size_t length = 0; + if(header == NULL) { + zck_log(ZCK_LOG_ERROR, "Unable to allocate %lu bytes\n", + zck->hash_type.digest_size); + return False; + } - if(!zck_read(src_fd, header, 6)) + zck_log(ZCK_LOG_DEBUG, "Reading magic and hash type\n"); + if(!zck_read(src_fd, header, 5 + MAX_COMP_SIZE)) { + free(header); return False; + } if(memcmp(header, "\0ZCK1", 5) != 0) { + free(header); zck_log(ZCK_LOG_ERROR, "Invalid header, perhaps this is not a zck file?\n"); return False; } + length += 5; - if(!zck_hash_setup(&(zck->hash_type), header[5])) + int hash_type = 0; + if(!zck_compint_to_int(&hash_type, header+length, &length)) return False; - zck->preindex_size = 6; - + if(!zck_hash_setup(&(zck->hash_type), hash_type)) + return False; + if(!zck_seek(src_fd, length, SEEK_SET)) + return False; + zck->header_string = header; + zck->header_size = length; return True; } int zck_read_index_hash(zckCtx *zck, int src_fd) { - char *header; - header = zmalloc(zck->hash_type.digest_size); - if(!header) { + if(zck->header_string == NULL) { + zck_log(ZCK_LOG_ERROR, + "Reading index hash before initial bytes are read\n"); + return False; + } + size_t length = zck->header_size; + char *header = zck->header_string; + zck->header_string = NULL; + zck->header_size = 0; + header = realloc(header, length + zck->hash_type.digest_size); + if(header == NULL) { + zck_log(ZCK_LOG_ERROR, "Unable to reallocate %lu bytes\n", + length + zck->hash_type.digest_size); + return False; + } + char *digest = zmalloc(zck->hash_type.digest_size); + if(digest == NULL) { zck_log(ZCK_LOG_ERROR, "Unable to allocate %lu bytes\n", zck->hash_type.digest_size); return False; } - if(!zck_read(src_fd, header, zck->hash_type.digest_size)) { + zck_log(ZCK_LOG_DEBUG, "Reading index hash\n"); + if(!zck_read(src_fd, digest, zck->hash_type.digest_size)) { + free(digest); free(header); return False; } - zck->index_digest = header; - zck->preindex_size += zck->hash_type.digest_size; + + /* Set hash to zeros in header string so we can validate it later */ + memset(header + length, 0, zck->hash_type.digest_size); + length += zck->hash_type.digest_size; + zck->index_digest = digest; + zck->header_string = header; + zck->header_size = length; return True; } -int zck_read_comp_type(zckCtx *zck, int src_fd) { - int8_t comp_type; +int zck_read_ct_is(zckCtx *zck, int src_fd) { + if(zck->header_string == NULL) { + zck_log(ZCK_LOG_ERROR, + "Reading compression type before hash type is read\n"); + return False; + } + size_t length = zck->header_size; + char *header = zck->header_string; + zck->header_string = NULL; + zck->header_size = 0; - if(!zck_read(src_fd, (char *)&comp_type, 1)) + header = realloc(header, length + MAX_COMP_SIZE*2); + if(header == NULL) { + zck_log(ZCK_LOG_ERROR, "Unable to allocate %lu bytes\n", + length + MAX_COMP_SIZE); return False; + } + zck_log(ZCK_LOG_DEBUG, "Reading compression type and index size\n"); + if(!zck_read(src_fd, header + length, MAX_COMP_SIZE*2)) + return False; + + int tmp = 0; - if(!zck_set_compression_type(zck, comp_type)) + /* Read and initialize compression type */ + if(!zck_compint_to_int(&tmp, header + length, &length)) + return False; + if(!zck_set_compression_type(zck, tmp)) return False; if(!zck_comp_init(zck)) return False; - zck->preindex_size += 1; - return True; -} - -int zck_read_index_size(zckCtx *zck, int src_fd) { - uint64_t index_size; - if(!zck_read(src_fd, (char *)&index_size, sizeof(uint64_t))) + /* Read and initialize index size */ + if(!zck_compint_to_int(&tmp, header + length, &length)) return False; + zck->index_size = tmp; - zck->comp_index_size = le64toh(index_size); - zck->preindex_size += sizeof(uint64_t); + if(!zck_seek(src_fd, length, SEEK_SET)) + return False; + zck->header_string = header; + zck->header_size = length; return True; } int zck_read_index(zckCtx *zck, int src_fd) { - char *index = zmalloc(zck->comp_index_size); + char *index = zmalloc(zck->index_size); if(!index) { zck_log(ZCK_LOG_ERROR, "Unable to allocate %lu bytes\n", - zck->comp_index_size); + zck->index_size); return False; } - if(!zck_read(src_fd, index, zck->comp_index_size)) { + zck_log(ZCK_LOG_DEBUG, "Reading index\n"); + if(!zck_read(src_fd, index, zck->index_size)) { free(index); return False; } - if(!zck_index_read(zck, index, zck->comp_index_size)) { + if(!zck_index_read(zck, index, zck->index_size)) { free(index); return False; } @@ -121,28 +179,58 @@ int zck_read_header(zckCtx *zck, int src_fd) { return False; if(!zck_read_index_hash(zck, src_fd)) return False; - if(!zck_read_comp_type(zck, src_fd)) - return False; - if(!zck_read_index_size(zck, src_fd)) + if(!zck_read_ct_is(zck, src_fd)) return False; if(!zck_read_index(zck, src_fd)) return False; return True; } -int zck_write_header(zckCtx *zck) { - uint64_t index_size; - - if(!zck_write(zck->fd, "\0ZCK1", 5)) +int zck_header_create(zckCtx *zck) { + int header_malloc = 5 + MAX_COMP_SIZE + zck->hash_type.digest_size + + MAX_COMP_SIZE*2; + char *header = zmalloc(header_malloc); + if(header == NULL) { + zck_log(ZCK_LOG_ERROR, "Unable to allocate %lu bytes\n", header_malloc); return False; - if(!zck_write(zck->fd, (const char *)&(zck->hash_type.type), 1)) + } + size_t length = 0; + memcpy(header+length, "\0ZCK1", 5); + length += 5; + if(!zck_compint_from_size(header+length, zck->hash_type.type, &length)) { + free(header); return False; - if(!zck_write(zck->fd, zck->index_digest, zck->hash_type.digest_size)) + } + + /* If we have the digest, write it in, otherwise write zeros */ + if(zck->index_digest) + memcpy(header+length, zck->index_digest, zck->hash_type.digest_size); + else + memset(header+length, 0, zck->hash_type.digest_size); + length += zck->hash_type.digest_size; + + if(!zck_compint_from_int(header+length, zck->comp.type, &length)) { + free(header); + return False; + } + if(!zck_compint_from_size(header+length, zck->index_size, &length)) { + free(header); return False; - if(!zck_write(zck->fd, (const char *)&(zck->comp.type), 1)) + } + header = realloc(header, length); + if(header == NULL) { + zck_log(ZCK_LOG_ERROR, "Unable to reallocate %lu bytes\n", length); return False; - index_size = htole64(zck->comp_index_size); - if(!zck_write(zck->fd, (const char *)&index_size, sizeof(uint64_t))) + } + if(zck->header_string) + free(zck->header_string); + zck->header_string = header; + zck->header_size = length; + return True; +} + +int zck_write_header(zckCtx *zck) { + if(!zck_write(zck->fd, zck->header_string, zck->header_size)) return False; return True; } diff --git a/src/lib/index/index_common.c b/src/lib/index/index_common.c index 458b2ae..41eb9c3 100644 --- a/src/lib/index/index_common.c +++ b/src/lib/index/index_common.c @@ -59,9 +59,13 @@ void zck_index_free(zckCtx *zck) { free(zck->full_hash.ctx); zck->full_hash.ctx = NULL; } - if(zck->comp_index) { - free(zck->comp_index); - zck->comp_index = NULL; + if(zck->index_string) { + free(zck->index_string); + zck->index_string = NULL; + } + if(zck->header_string) { + free(zck->header_string); + zck->header_string = NULL; } if(zck->index_digest) { free(zck->index_digest); diff --git a/src/lib/index/index_create.c b/src/lib/index/index_create.c index 4463d00..f349463 100644 --- a/src/lib/index/index_create.c +++ b/src/lib/index/index_create.c @@ -34,72 +34,69 @@ int zck_index_finalize(zckCtx *zck) { zckHash index_hash; char *index; - char *index_loc; + size_t index_malloc = 0; size_t index_size = 0; - uint64_t index_count = 0; zck->full_hash_digest = zck_hash_finalize(&(zck->full_hash)); if(zck->full_hash_digest == NULL) return False; - index_size = 1; // Chunk hash type; - index_size += sizeof(uint64_t); // Number of index entries - index_size += zck->hash_type.digest_size; // Full hash digest + index_malloc = MAX_COMP_SIZE * 2; // Chunk hash type and # of index entries + index_malloc += zck->hash_type.digest_size; // Full hash digest - /* Add digest size + 8 bytes for end location for each entry in index */ + /* Add digest size + MAX_COMP_SIZE bytes for length of each entry in + * index */ if(zck->index.first) { zckIndex *tmp = zck->index.first; while(tmp) { - index_size += zck->index.digest_size + sizeof(uint64_t); + index_malloc += zck->index.digest_size + MAX_COMP_SIZE; tmp = tmp->next; } } /* Write index */ - index = zmalloc(index_size); - index_loc = index; - memcpy(index_loc, &(zck->index.hash_type), 1); - index_loc += 1; - index_count = htole64(zck->index.count); - memcpy(index_loc, &index_count, sizeof(uint64_t)); - index_loc += sizeof(uint64_t); - memcpy(index_loc, zck->full_hash_digest, zck->hash_type.digest_size); - index_loc += zck->hash_type.digest_size; + index = zmalloc(index_malloc); + zck_compint_from_size(index+index_size, zck->index.hash_type, &index_size); + zck_compint_from_size(index+index_size, zck->index.count, &index_size); + memcpy(index+index_size, zck->full_hash_digest, zck->hash_type.digest_size); + index_size += zck->hash_type.digest_size; if(zck->index.first) { zckIndex *tmp = zck->index.first; while(tmp) { - uint64_t end = htole64(tmp->start + tmp->length); - memcpy(index_loc, tmp->digest, zck->index.digest_size); - index_loc += zck->index.hash_type; - memcpy(index_loc, &end, sizeof(uint64_t)); - index_loc += sizeof(uint64_t); + memcpy(index+index_size, tmp->digest, zck->index.digest_size); + index_size += zck->index.digest_size; + zck_compint_from_size(index+index_size, tmp->length, &index_size); tmp = tmp->next; } } - - if(!zck->comp.compress(&zck->comp, index, index_size, &(zck->comp_index), - &(zck->comp_index_size), 0)) { - free(index); + /* Shrink index to actual size */ + index = realloc(index, index_size); + if(index == NULL) { + zck_log(ZCK_LOG_ERROR, "Unable to reallocate %lu bytes\n", index_size); return False; } - index_size = htole64((uint64_t) zck->comp_index_size); + zck->index_string = index; + zck->index_size = index_size; - /* Calculate hash of index, including compressed size at beginning */ - if(!zck_hash_init(&index_hash, &(zck->hash_type))) { - free(index); - return False; + /* Rebuild header with index hash set to zeros */ + if(zck->index_digest) { + free(zck->index_digest); + zck->index_digest = NULL; } - if(!zck_hash_update(&index_hash, (const char *)&(zck->comp.type), 1)) { + if(!zck_header_create(zck)) + return False; + + /* Calculate hash of header */ + if(!zck_hash_init(&index_hash, &(zck->hash_type))) { free(index); return False; } - if(!zck_hash_update(&index_hash, (const char *)&index_size, - sizeof(uint64_t))) { + if(!zck_hash_update(&index_hash, zck->header_string, zck->header_size)) { free(index); return False; } - if(!zck_hash_update(&index_hash, zck->comp_index, zck->comp_index_size)) { + if(!zck_hash_update(&index_hash, zck->index_string, zck->index_size)) { free(index); return False; } @@ -110,7 +107,11 @@ int zck_index_finalize(zckCtx *zck) { zck_hash_name_from_type(zck->hash_type.type)); return False; } - free(index); + + /* Rebuild header string with calculated index hash */ + if(!zck_header_create(zck)) + return False; + return True; } @@ -184,10 +185,11 @@ int zck_index_add_chunk(zckCtx *zck, char *data, size_t size) { if(!zck_index_new_chunk(&(zck->index), digest, zck->index.digest_size, size, True)) return False; + free(digest); } return True; } int zck_write_index(zckCtx *zck) { - return zck_write(zck->fd, zck->comp_index, zck->comp_index_size); + return zck_write(zck->fd, zck->index_string, zck->index_size); } diff --git a/src/lib/index/index_read.c b/src/lib/index/index_read.c index f4115ac..8622b0e 100644 --- a/src/lib/index/index_read.c +++ b/src/lib/index/index_read.c @@ -33,22 +33,15 @@ #include "zck_private.h" int zck_index_read(zckCtx *zck, char *data, size_t size) { - zckHash index_hash; - char *digest; - uint64_t index_size; - uint64_t index_count; - char *dst = NULL; - size_t dst_size = 0; - char *cur_loc; + zckHash index_hash = {0}; + char *digest = NULL; + size_t length = 0; /* Check that index checksum matches stored checksum */ - zck_log(ZCK_LOG_DEBUG, "Reading index size\n"); - index_size = htole64(size); + zck_log(ZCK_LOG_DEBUG, "Calculating index checksum\n"); if(!zck_hash_init(&index_hash, &(zck->hash_type))) return False; - if(!zck_hash_update(&index_hash, (const char *)&(zck->comp.type), 1)) - return False; - if(!zck_hash_update(&index_hash, (const char *)&index_size, sizeof(uint64_t))) + if(!zck_hash_update(&index_hash, zck->header_string, zck->header_size)) return False; if(!zck_hash_update(&index_hash, data, size)) return False; @@ -65,84 +58,77 @@ int zck_index_read(zckCtx *zck, char *data, size_t size) { zck_log(ZCK_LOG_ERROR, "Index fails checksum test\n"); return False; } + zck_log(ZCK_LOG_DEBUG, "Checksum is valid\n"); free(digest); - zck_log(ZCK_LOG_DEBUG, "Decompressing index\n"); - if(!zck_decompress(zck, data, size, &dst, &dst_size)) { - zck_log(ZCK_LOG_ERROR, "Unable to decompress index\n"); - return False; - } /* Make sure there's at least enough data for full digest and index count */ - if(dst_size < zck->hash_type.digest_size + sizeof(uint64_t) + 1) { + if(size < zck->hash_type.digest_size + MAX_COMP_SIZE*2) { zck_log(ZCK_LOG_ERROR, "Index is too small to read\n"); - if(dst) - free(dst); return False; } - zckIndex *prev = zck->index.first; + /* Read and configure hash type */ + int hash_type; + if(!zck_compint_to_int(&hash_type, data + length, &length)) + return False; + if(!zck_set_chunk_hash_type(zck, hash_type)) + return False; + + /* Read number of index entries */ + size_t index_count; + if(!zck_compint_to_size(&index_count, data + length, &length)) + return False; + zck->index.count = index_count; + + /* Read full data hash */ zck->full_hash_digest = zmalloc(zck->hash_type.digest_size); if(!zck->full_hash_digest) { - if(dst) - free(dst); zck_log(ZCK_LOG_ERROR, "Unable to allocate %lu bytes\n", zck->hash_type.digest_size); return False; } - uint8_t hash_type; - memcpy(&hash_type, dst, 1); - if(!zck_set_chunk_hash_type(zck, hash_type)) { - if(dst) - free(dst); - return False; - } + memcpy(zck->full_hash_digest, data + length, zck->hash_type.digest_size); + length += zck->hash_type.digest_size; - if((dst_size - (zck->hash_type.digest_size + sizeof(uint64_t)+ 1)) % - (zck->index.digest_size + sizeof(uint64_t)) != 0) { - zck_log(ZCK_LOG_ERROR, "Index size is invalid\n"); - if(dst) - free(dst); - return False; - } - cur_loc = dst + 1; - memcpy(&index_count, cur_loc, sizeof(uint64_t)); - zck->index.count = le64toh(index_count); - cur_loc += sizeof(uint64_t); - memcpy(zck->full_hash_digest, cur_loc, zck->hash_type.digest_size); - cur_loc += zck->hash_type.digest_size; - uint64_t prev_loc = 0; - while(cur_loc < dst + dst_size) { + zckIndex *prev = zck->index.first; + size_t idx_loc = 0; + while(length < size) { zckIndex *new = zmalloc(sizeof(zckIndex)); if(!new) { zck_log(ZCK_LOG_ERROR, "Unable to allocate %lu bytes\n", sizeof(zckIndex)); return False; } - uint64_t end = 0; + /* Read index entry digest */ new->digest = zmalloc(zck->index.digest_size); if(!new->digest) { zck_log(ZCK_LOG_ERROR, "Unable to allocate %lu bytes\n", zck->index.digest_size); return False; } - memcpy(new->digest, cur_loc, zck->index.digest_size); + memcpy(new->digest, data+length, zck->index.digest_size); new->digest_size = zck->index.digest_size; - cur_loc += zck->index.digest_size; - memcpy(&end, cur_loc, sizeof(uint64_t)); - new->start = prev_loc; - new->length = le64toh(end) - prev_loc; + length += zck->index.digest_size; + + /* Read and store entry length */ + size_t chunk_length = 0; + if(!zck_compint_to_size(&chunk_length, data+length, &length)) + return False; + new->start = idx_loc; + new->length = chunk_length; new->finished = False; - prev_loc = le64toh(end); - zck->index.length += new->length; - cur_loc += sizeof(uint64_t); - if(prev) { + + idx_loc += chunk_length; + zck->index.length = idx_loc; + + if(prev) prev->next = new; - } else { + else zck->index.first = new; - } prev = new; } - free(dst); + free(zck->index_string); + zck->index_string = NULL; return True; } diff --git a/src/lib/io.c b/src/lib/io.c index 880be94..900b733 100644 --- a/src/lib/io.c +++ b/src/lib/io.c @@ -69,6 +69,27 @@ int zck_write(int fd, const char *data, size_t length) { return True; } +int zck_write_comp_size(int fd, size_t val) { + char data[sizeof(size_t)*2] = {0}; + size_t length = 0; + if(!zck_compint_from_size(data, val, &length)) + return False; + return zck_write(fd, data, length); +} + +int zck_read_comp_size(int fd, size_t *val, size_t *length) { + char data[MAX_COMP_SIZE] = {0}; + int i=0; + for(char c=zck_read(fd, data+i, 1); c < 128 && i < MAX_COMP_SIZE; + i++,c=zck_read(fd, data+i, 1)); + if(i == MAX_COMP_SIZE && data[i] < 128) { + zck_log(ZCK_LOG_ERROR, "Number too large\n"); + *val = 0; + return False; + } + return !zck_compint_to_size(val, data, length); +} + int zck_seek(int fd, off_t offset, int whence) { if(lseek(fd, offset, whence) == -1) { char *wh_str = NULL; diff --git a/src/lib/meson.build b/src/lib/meson.build index 0ec7488..86876db 100644 --- a/src/lib/meson.build +++ b/src/lib/meson.build @@ -3,7 +3,7 @@ subdir('comp') subdir('hash') subdir('index') subdir('dl') -sources += ['zck.c', 'header.c', 'io.c', 'log.c'] +sources += ['zck.c', 'header.c', 'io.c', 'log.c', 'compint.c'] zcklib = shared_library('zck', sources, include_directories: inc, diff --git a/src/lib/zck.c b/src/lib/zck.c index bc47aa9..9386ac7 100644 --- a/src/lib/zck.c +++ b/src/lib/zck.c @@ -62,7 +62,7 @@ int zck_write_file(zckCtx *zck) { return True; } -void zck_free(zckCtx *zck) { +void zck_clear(zckCtx *zck) { if(zck == NULL) return; zck_index_free(zck); @@ -81,7 +81,14 @@ void zck_free(zckCtx *zck) { close(zck->temp_fd); zck->temp_fd = 0; } - free(zck); +} + +void zck_free(zckCtx **zck) { + if(*zck == NULL) + return; + zck_clear(*zck); + free(*zck); + *zck = NULL; } zckCtx *zck_create() { @@ -94,7 +101,7 @@ zckCtx *zck_create() { return zck; } -int zck_set_full_hash_type(zckCtx *zck, uint8_t hash_type) { +int zck_set_full_hash_type(zckCtx *zck, int hash_type) { VALIDATE(zck); zck_log(ZCK_LOG_INFO, "Setting full hash to %s\n", zck_hash_name_from_type(hash_type)); @@ -111,7 +118,7 @@ int zck_set_full_hash_type(zckCtx *zck, uint8_t hash_type) { return True; } -int zck_set_chunk_hash_type(zckCtx *zck, uint8_t hash_type) { +int zck_set_chunk_hash_type(zckCtx *zck, int hash_type) { VALIDATE(zck); memset(&(zck->chunk_hash_type), 0, sizeof(zckHashType)); zck_log(ZCK_LOG_INFO, "Setting chunk hash to %s\n", @@ -150,7 +157,7 @@ int zck_get_chunk_hash_type(zckCtx *zck) { return zck->index.hash_type; } -int64_t zck_get_index_count(zckCtx *zck) { +ssize_t zck_get_index_count(zckCtx *zck) { if(zck == NULL) return -1; return zck->index.count; @@ -174,10 +181,10 @@ char *zck_get_full_digest(zckCtx *zck) { return zck->full_hash_digest; } -int64_t zck_get_predata_length(zckCtx *zck) { +ssize_t zck_get_header_length(zckCtx *zck) { if(zck == NULL) return -1; - return zck->preindex_size + zck->comp_index_size; + return zck->header_size + zck->index_size; } int zck_get_tmp_fd() { @@ -216,7 +223,7 @@ int zck_get_tmp_fd() { int zck_init_write (zckCtx *zck, int dst_fd) { VALIDATE(zck); - zck_free(zck); + zck_clear(zck); memset(zck, 0, sizeof(zckCtx)); zck->temp_fd = zck_get_tmp_fd(); @@ -347,11 +354,8 @@ int zck_decompress_to_file(zckCtx *zck, int src_fd, int dst_fd) { zck_log(ZCK_LOG_ERROR, "Unable to allocate %lu bytes\n", csize); return False; } - if(lseek(src_fd, start + idx->start, SEEK_SET) == -1) { - zck_log(ZCK_LOG_ERROR, "Unable to seek in source file: %s\n", - strerror(errno)); + if(!zck_seek(src_fd, start + idx->start, SEEK_SET)) return False; - } if(!zck_read(src_fd, cdata, csize)) { free(cdata); zck_log(ZCK_LOG_ERROR, "Error reading chunk %i\n", count); diff --git a/src/lib/zck_private.h b/src/lib/zck_private.h index 0b73675..2fbec6f 100644 --- a/src/lib/zck_private.h +++ b/src/lib/zck_private.h @@ -6,6 +6,8 @@ #include #define BUF_SIZE 32768 +/* Maximum string length for a compressed size_t */ +#define MAX_COMP_SIZE (((sizeof(size_t) * 8) / 7) + 1) #define zmalloc(x) calloc(1, x) @@ -24,7 +26,7 @@ typedef int (*fcclose)(struct zckComp *comp); typedef enum log_type log_type; typedef struct zckHashType { - uint8_t type; + int type; int digest_size; } zckHashType; @@ -78,8 +80,10 @@ typedef struct zckCtx { size_t preindex_size; char *full_hash_digest; - char *comp_index; - size_t comp_index_size; + char *header_string; + size_t header_size; + char *index_string; + size_t index_size; zckIndexInfo index; char *index_digest; zckHash full_hash; @@ -89,7 +93,7 @@ typedef struct zckCtx { zckHashType chunk_hash_type; } zckCtx; -const char *zck_hash_name_from_type(uint8_t hash_type); +const char *zck_hash_name_from_type(int hash_type); int zck_get_tmp_fd(); int zck_validate_file(zckCtx *zck); @@ -98,7 +102,7 @@ int zck_comp_init(zckCtx *zck); int zck_compress(zckCtx *zck, const char *src, const size_t src_size); int zck_decompress(zckCtx *zck, const char *src, const size_t src_size, char **dst, size_t *dst_size); int zck_comp_close(zckCtx *zck); -int zck_set_compression_type(zckCtx *zck, uint8_t type); +int zck_set_compression_type(zckCtx *zck, int type); int zck_set_comp_parameter(zckCtx *zck, int option, void *value); /* hash/hash.h */ @@ -123,14 +127,16 @@ int zck_write_index(zckCtx *zck); int zck_seek(int fd, off_t offset, int whence); int zck_read(int fd, char *data, size_t length); int zck_write(int fd, const char *data, size_t length); +int zck_write_comp_size(int fd, size_t val); +int zck_read_comp_size(int fd, size_t *val, size_t *length); int zck_chunks_from_temp(zckCtx *zck); /* header.c */ int zck_read_initial(zckCtx *zck, int src_fd); int zck_read_index_hash(zckCtx *zck, int src_fd); -int zck_read_comp_type(zckCtx *zck, int src_fd); -int zck_read_index_size(zckCtx *zck, int src_fd); +int zck_read_ct_is(zckCtx *zck, int src_fd); int zck_read_index(zckCtx *zck, int src_fd); +int zck_header_create(zckCtx *zck); int zck_read_header(zckCtx *zck, int src_fd); int zck_write_header(zckCtx *zck); @@ -145,6 +151,14 @@ size_t zck_multipart_get_boundary(zckDL *dl, char *b, size_t size); /* dl/dl.c */ int zck_dl_write_range(zckDL *dl, const char *at, size_t length); +/* compint.c */ +int zck_compint_from_int(char *compint, int val, size_t *length); +int zck_compint_from_size(char *compint, size_t val, size_t *length); +int zck_compint_to_int(int *val, const char *compint, size_t *length); +int zck_compint_to_size(size_t *val, const char *compint, size_t *length); + + + /* log.c */ void zck_log(log_type lt, const char *format, ...); #endif diff --git a/src/unzck.c b/src/unzck.c index bffce2f..04029e5 100644 --- a/src/unzck.c +++ b/src/unzck.c @@ -35,10 +35,7 @@ #include int main (int argc, char *argv[]) { - zckCtx *zck = zck_create(); char *out_name; - if(zck == NULL) - exit(1); zck_set_log_level(ZCK_LOG_DEBUG); @@ -65,16 +62,20 @@ int main (int argc, char *argv[]) { exit(1); } + zckCtx *zck = zck_create(); + if(zck == NULL) + exit(1); if(!zck_decompress_to_file(zck, src_fd, dst_fd)) { unlink(out_name); free(out_name); close(src_fd); close(dst_fd); - zck_free(zck); + zck_free(&zck); + exit(1); } free(out_name); close(src_fd); close(dst_fd); - zck_free(zck); + zck_free(&zck); } diff --git a/src/zck.c b/src/zck.c index d995d0c..3cd1e47 100644 --- a/src/zck.c +++ b/src/zck.c @@ -207,6 +207,6 @@ int main (int argc, char *argv[]) { } if(!zck_write_file(zck)) exit(1); - zck_free(zck); + zck_free(&zck); close(dst_fd); } diff --git a/src/zck_delta_size.c b/src/zck_delta_size.c index efd6228..17a48ea 100644 --- a/src/zck_delta_size.c +++ b/src/zck_delta_size.c @@ -109,6 +109,6 @@ int main (int argc, char *argv[]) { } printf("Would download %i of %i bytes\n", dl_size, total_size); printf("Matched %i of %lu chunks\n", matched_chunks, zck_get_index_count(zck_tgt)); - zck_free(zck_tgt); - zck_free(zck_src); + zck_free(&zck_tgt); + zck_free(&zck_src); } diff --git a/src/zck_dl.c b/src/zck_dl.c index e8d1742..fca3c95 100644 --- a/src/zck_dl.c +++ b/src/zck_dl.c @@ -106,8 +106,8 @@ int main (int argc, char *argv[]) { break; } zck_dl_free(&dl); - zck_free(zck_tgt); - zck_free(zck_src); + zck_free(&zck_tgt); + zck_free(&zck_src); zck_dl_global_cleanup(); exit(0); } diff --git a/src/zck_read_header.c b/src/zck_read_header.c index 3cd8989..26837fe 100644 --- a/src/zck_read_header.c +++ b/src/zck_read_header.c @@ -75,9 +75,9 @@ int main (int argc, char *argv[]) { while(idx) { for(int i=0; idigest[i]); - printf(" %12lu %12lu\n", idx->start + zck_get_predata_length(zck), idx->length); + printf(" %12lu %12lu\n", idx->start + zck_get_header_length(zck), idx->length); idx = idx->next; } - zck_free(zck); + zck_free(&zck); } -- 2.30.2