From 2f66f9bd1ec5aae3ab790c8cb0ff5310120e596e Mon Sep 17 00:00:00 2001 From: Wolf Vollprecht Date: Sat, 8 Jan 2022 12:25:00 +0100 Subject: [PATCH] add newlines, fix up zck_dl, other review changes --- .github/environment.yml | 2 +- src/zck.c | 7 +- src/zck_delta_size.c | 2 +- src/zck_dl.c | 454 ++++++++++++++++++++++++++++++++-------- src/zck_read_header.c | 10 +- test/copy_chunks.c | 2 +- test/empty.c | 2 +- test/shacheck.c | 2 +- test/zck_cmp_uncomp.c | 2 +- 9 files changed, 381 insertions(+), 102 deletions(-) diff --git a/.github/environment.yml b/.github/environment.yml index 92d7114..8d919cb 100644 --- a/.github/environment.yml +++ b/.github/environment.yml @@ -10,4 +10,4 @@ dependencies: - pkg-config - openssl - sel(osx): argp-standalone -- sel(win): cmake \ No newline at end of file +- sel(win): cmake diff --git a/src/zck.c b/src/zck.c index 151e208..87c0811 100644 --- a/src/zck.c +++ b/src/zck.c @@ -172,8 +172,7 @@ int main (int argc, char *argv[]) { char *base_name = NULL; char *out_name = NULL; if(arguments.output == NULL) { - // base_name = basename(arguments.args[0]); - base_name = arguments.args[0]; + base_name = basename(arguments.args[0]); out_name = malloc(strlen(base_name) + 5); assert(out_name); snprintf(out_name, strlen(base_name) + 5, "%s.zck", base_name); @@ -191,7 +190,7 @@ int main (int argc, char *argv[]) { int dict_fd = open(arguments.dict, O_RDONLY | O_BINARY); if(dict_fd < 0) { LOG_ERROR("Unable to open dictionary %s for reading", - arguments.dict); + arguments.dict); perror(""); exit(1); } @@ -353,4 +352,4 @@ int main (int argc, char *argv[]) { zck_free(&zck); close(dst_fd); -} \ No newline at end of file +} diff --git a/src/zck_delta_size.c b/src/zck_delta_size.c index a52b4b7..65f3c8d 100644 --- a/src/zck_delta_size.c +++ b/src/zck_delta_size.c @@ -197,4 +197,4 @@ int main (int argc, char *argv[]) { (long unsigned)zck_get_chunk_count(zck_tgt)); zck_free(&zck_tgt); zck_free(&zck_src); -} \ No newline at end of file +} diff --git a/src/zck_dl.c b/src/zck_dl.c index a52b4b7..44f9e06 100644 --- a/src/zck_dl.c +++ b/src/zck_dl.c @@ -1,5 +1,5 @@ /* - * Copyright 2018 Jonathan Dieter + * Copyright 2018, 2020 Jonathan Dieter * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -24,6 +24,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#define _GNU_SOURCE + #include #include #include @@ -32,27 +34,43 @@ #include #include #include +#include #include +#include #include #include +#include #include "util_common.h" -static char doc[] = "zck_delta_size - Calculate the difference between" - " two zchunk files"; +static char doc[] = "zckdl - Download zchunk file"; -static char args_doc[] = " "; +static char args_doc[] = ""; static struct argp_option options[] = { - {"verbose", 'v', 0, 0, - "Increase verbosity (can be specified more than once for debugging)"}, - {"version", 'V', 0, 0, "Show program version"}, + {"verbose", 'v', 0, 0, "Increase verbosity"}, + {"quiet", 'q', 0, 0, + "Only show warnings (can be specified twice to only show errors)"}, + {"source", 's', "FILE", 0, "File to use as delta source"}, + {"fail-no-ranges", 1000, 0, 0, + "If server doesn't support ranges, fail instead of downloading full file"}, + {"version", 'V', 0, 0, "Show program version"}, { 0 } }; +static int range_attempt[] = { + 255, + 127, + 7, + 2, + 1 +}; + struct arguments { - char *args[2]; + char *args[1]; zck_log_type log_level; + char *source; + int fail_no_ranges; bool exit; }; @@ -64,17 +82,31 @@ static error_t parse_opt (int key, char *arg, struct argp_state *state) { switch (key) { case 'v': + if(arguments->log_level > ZCK_LOG_INFO) + arguments->log_level = ZCK_LOG_INFO; arguments->log_level--; if(arguments->log_level < ZCK_LOG_DDEBUG) arguments->log_level = ZCK_LOG_DDEBUG; break; + case 'q': + if(arguments->log_level < ZCK_LOG_INFO) + arguments->log_level = ZCK_LOG_INFO; + arguments->log_level += 1; + if(arguments->log_level > ZCK_LOG_NONE) + arguments->log_level = ZCK_LOG_NONE; + break; + case 's': + arguments->source = arg; + break; case 'V': version(); arguments->exit = true; break; - + case 1000: + arguments->fail_no_ranges = 1; + break; case ARGP_KEY_ARG: - if (state->arg_num >= 2) { + if (state->arg_num >= 1) { argp_usage (state); return EINVAL; } @@ -83,7 +115,7 @@ static error_t parse_opt (int key, char *arg, struct argp_state *state) { break; case ARGP_KEY_END: - if (state->arg_num < 2) { + if (state->arg_num < 1) { argp_usage (state); return EINVAL; } @@ -97,104 +129,352 @@ static error_t parse_opt (int key, char *arg, struct argp_state *state) { static struct argp argp = {options, parse_opt, args_doc, doc}; +typedef struct dlCtx { + CURL *curl; + zckDL *dl; + int fail_no_ranges; + int range_fail; + int max_ranges; +} dlCtx; + +/* Fail if dl_ctx->fail_no_ranges is set and we get a 200 response */ +size_t dl_header_cb(char *b, size_t l, size_t c, void *dl_v) { + dlCtx *dl_ctx = (dlCtx*)dl_v; + if(dl_ctx->fail_no_ranges) { + long code = -1; + curl_easy_getinfo(dl_ctx->curl, CURLINFO_RESPONSE_CODE, &code); + if(code == 200) { + dl_ctx->range_fail = 1; + return 0; + } + } + return zck_header_cb(b, l, c, dl_ctx->dl); +} + +/* Return 0 on error, -1 on 200 response (if dl_ctx->fail_no_ranges), + * and 1 on complete success */ +int dl_range(dlCtx *dl_ctx, char *url, char *range, int is_chunk) { + if(dl_ctx == NULL || dl_ctx->dl == NULL) { + free(range); + LOG_ERROR("Struct not defined\n"); + return 0; + } + + CURL *curl = dl_ctx->curl; + CURLcode res; + + curl_easy_setopt(curl, CURLOPT_URL, url); + curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); + curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, dl_header_cb); + curl_easy_setopt(curl, CURLOPT_HEADERDATA, dl_ctx); + if(is_chunk) + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, zck_write_chunk_cb); + else + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, zck_write_zck_header_cb); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, dl_ctx->dl); + curl_easy_setopt(curl, CURLOPT_RANGE, range); + res = curl_easy_perform(curl); + free(range); + + if(dl_ctx->range_fail) + return -1; + + if(res != CURLE_OK) { + LOG_ERROR("Download failed: %s\n", + curl_easy_strerror(res)); + return 0; + } + long code; + curl_easy_getinfo (curl, CURLINFO_RESPONSE_CODE, &code); + if (code != 206 && code != 200) { + LOG_ERROR("HTTP Error: %li when downloading %s\n", code, + url); + return 0; + } + + + return 1; +} + + +int dl_byte_range(dlCtx *dl_ctx, char *url, int start, int end) { + char *range = NULL; + zck_dl_reset(dl_ctx->dl); + if(start > -1 && end > -1) + range = zck_get_range(start, end); + return dl_range(dl_ctx, url, range, 0); +} + +int dl_bytes(dlCtx *dl_ctx, char *url, size_t bytes, size_t start, + size_t *buffer_len, int log_level) { + if(start + bytes > *buffer_len) { + zckDL *dl = dl_ctx->dl; + + int fd = zck_get_fd(zck_dl_get_zck(dl)); + + if(lseek(fd, *buffer_len, SEEK_SET) == -1) { + LOG_ERROR("Seek to download location failed: %s\n", + strerror(errno)); + return 0; + } + if(*buffer_len >= start + bytes) + return 1; + + int retval = dl_byte_range(dl_ctx, url, *buffer_len, + (start + bytes) - 1); + if(retval < 1) + return retval; + + if(log_level <= ZCK_LOG_DEBUG) + LOG_ERROR("Downloading %lu bytes at position %lu\n", + (unsigned long)start+bytes-*buffer_len, + (unsigned long)*buffer_len); + *buffer_len += start + bytes - *buffer_len; + if(lseek(fd, start, SEEK_SET) == -1) { + LOG_ERROR("Seek to byte %lu of temporary file failed: %s\n", + (unsigned long)start, strerror(errno)); + return 0; + } + } + return 1; +} + +int dl_header(CURL *curl, zckDL *dl, char *url, int fail_no_ranges, + int log_level) { + size_t buffer_len = 0; + size_t start = 0; + + dlCtx dl_ctx = {0}; + dl_ctx.fail_no_ranges = 1; + dl_ctx.dl = dl; + dl_ctx.curl = curl; + dl_ctx.max_ranges = 1; + + /* Download minimum download size and read magic and hash type */ + int retval = dl_bytes(&dl_ctx, url, zck_get_min_download_size(), start, + &buffer_len, log_level); + if(retval < 1) + return retval; + + zckCtx *zck = zck_dl_get_zck(dl); + if(zck == NULL) + return 0; + + if(!zck_read_lead(zck)) + return 0; + start = zck_get_lead_length(zck); + if(!dl_bytes(&dl_ctx, url, zck_get_header_length(zck) - start, + start, &buffer_len, log_level)) + return 0; + if(!zck_read_header(zck)) + return 0; + return 1; +} + int main (int argc, char *argv[]) { struct arguments arguments = {0}; /* Defaults */ - arguments.log_level = ZCK_LOG_ERROR; + arguments.log_level = ZCK_LOG_INFO; - int retval = argp_parse(&argp, argc, argv, 0, 0, &arguments); + int retval = argp_parse (&argp, argc, argv, 0, 0, &arguments); if(retval || arguments.exit) exit(retval); + curl_global_init(CURL_GLOBAL_ALL); + zck_set_log_level(arguments.log_level); - int src_fd = open(arguments.args[0], O_RDONLY | O_BINARY); - if(src_fd < 0) { - LOG_ERROR("Unable to open %s\n", arguments.args[0]); - perror(""); - exit(1); - } - zckCtx *zck_src = zck_create(); - if(zck_src == NULL) { - LOG_ERROR("%s", zck_get_error(NULL)); - zck_clear_error(NULL); - exit(1); + zckCtx *zck_src = NULL; + if(arguments.source) { + int src_fd = open(arguments.source, O_RDONLY); + if(src_fd < 0) { + LOG_ERROR("Unable to open %s\n", arguments.source); + perror(""); + exit(10); + } + zck_src = zck_create(); + if(zck_src == NULL) { + LOG_ERROR("%s", zck_get_error(NULL)); + zck_clear_error(NULL); + exit(10); + } + if(!zck_init_read(zck_src, src_fd)) { + LOG_ERROR("Unable to open %s: %s", arguments.source, + zck_get_error(zck_src)); + exit(10); + } } - if(!zck_init_read(zck_src, src_fd)) { - LOG_ERROR("Error reading %s: %s", arguments.args[0], - zck_get_error(zck_src)); - zck_free(&zck_src); - exit(1); + + CURL *curl_ctx = curl_easy_init(); + if(!curl_ctx) { + LOG_ERROR("Unable to allocate %lu bytes for curl context\n", + (unsigned long)sizeof(CURL)); + exit(10); } - close(src_fd); - int tgt_fd = open(arguments.args[1], O_RDONLY | O_BINARY); - if(tgt_fd < 0) { - LOG_ERROR("Unable to open %s\n", arguments.args[1]); - perror(""); - zck_free(&zck_src); - exit(1); + char *outname = basename(arguments.args[0]); + int dst_fd = open(outname, O_RDWR | O_CREAT, 0666); + if(dst_fd < 0) { + LOG_ERROR("Unable to open %s: %s\n", outname, + strerror(errno)); + exit(10); } zckCtx *zck_tgt = zck_create(); if(zck_tgt == NULL) { LOG_ERROR("%s", zck_get_error(NULL)); zck_clear_error(NULL); - zck_free(&zck_src); - exit(1); - } - if(!zck_init_read(zck_tgt, tgt_fd)) { - LOG_ERROR("Error reading %s: %s", arguments.args[1], - zck_get_error(zck_tgt)); - zck_free(&zck_src); - zck_free(&zck_tgt); - exit(1); - } - close(tgt_fd); - - if(zck_get_chunk_hash_type(zck_tgt) != zck_get_chunk_hash_type(zck_src)) { - LOG_ERROR("ERROR: Chunk hash types don't match:\n"); - LOG_ERROR(" %s: %s\n", arguments.args[0], - zck_hash_name_from_type(zck_get_chunk_hash_type(zck_tgt))); - LOG_ERROR(" %s: %s\n", arguments.args[1], - zck_hash_name_from_type(zck_get_chunk_hash_type(zck_src))); - zck_free(&zck_src); - zck_free(&zck_tgt); - exit(1); - } - zckChunk *tgt_idx = zck_get_first_chunk(zck_tgt); - zckChunk *src_idx = zck_get_first_chunk(zck_src); - if(tgt_idx == NULL || src_idx == NULL) - exit(1); - - if(!zck_compare_chunk_digest(tgt_idx, src_idx)) - LOG_ERROR("WARNING: Dicts don't match\n"); - ssize_t dl_size = zck_get_header_length(zck_tgt); - if(dl_size < 0) - exit(1); - ssize_t total_size = zck_get_header_length(zck_tgt); - ssize_t matched_chunks = 0; - for(tgt_idx = zck_get_first_chunk(zck_tgt); tgt_idx; - tgt_idx = zck_get_next_chunk(tgt_idx)) { - bool found = false; - for(src_idx = zck_get_first_chunk(zck_src); src_idx; - src_idx = zck_get_next_chunk(src_idx)) { - if(zck_compare_chunk_digest(tgt_idx, src_idx)) { - found = true; - break; + exit(10); + } + if(!zck_init_adv_read(zck_tgt, dst_fd)) { + LOG_ERROR("%s", zck_get_error(zck_tgt)); + exit(10); + } + + zckDL *dl = zck_dl_init(zck_tgt); + if(dl == NULL) { + LOG_ERROR("%s", zck_get_error(NULL)); + zck_clear_error(NULL); + exit(10); + } + + int exit_val = 0; + + retval = dl_header(curl_ctx, dl, arguments.args[0], + arguments.fail_no_ranges, arguments.log_level); + if(!retval) { + exit_val = 10; + goto out; + } + + /* The server doesn't support ranges */ + if(retval == -1) { + if(arguments.fail_no_ranges) { + LOG_ERROR("Server doesn't support ranges and --fail-no-ranges was " + "set\n"); + exit_val = 2; + goto out; + } + /* Download the full file */ + lseek(dst_fd, 0, SEEK_SET); + if(ftruncate(dst_fd, 0) < 0) { + perror(NULL); + exit_val = 10; + goto out; + } + dlCtx dl_ctx = {0}; + dl_ctx.dl = dl; + dl_ctx.curl = curl_ctx; + dl_ctx.max_ranges = 0; + if(!dl_byte_range(&dl_ctx, arguments.args[0], -1, -1)) { + exit_val = 10; + goto out; + } + lseek(dst_fd, 0, SEEK_SET); + if(!zck_read_lead(zck_tgt) || !zck_read_header(zck_tgt)) { + exit_val = 10; + goto out; + } + } else { + /* If file is already fully downloaded, let's get out of here! */ + int retval = zck_find_valid_chunks(zck_tgt); + if(retval == 0) { + exit_val = 10; + goto out; + } + if(retval == 1) { + printf("Missing chunks: 0\n"); + printf("Downloaded %lu bytes\n", + (long unsigned)zck_dl_get_bytes_downloaded(dl)); + if(ftruncate(dst_fd, zck_get_length(zck_tgt)) < 0) { + perror(NULL); + exit_val = 10; + goto out; } + exit_val = 0; + goto out; } - if(!found) { - dl_size += zck_get_chunk_comp_size(tgt_idx); - } else { - matched_chunks += 1; + if(zck_src && !zck_copy_chunks(zck_src, zck_tgt)) { + exit_val = 10; + goto out; + } + zck_reset_failed_chunks(zck_tgt); + dlCtx dl_ctx = {0}; + dl_ctx.dl = dl; + dl_ctx.curl = curl_ctx; + dl_ctx.max_ranges = range_attempt[0]; + dl_ctx.fail_no_ranges = 1; + int ra_index = 0; + printf("Missing chunks: %i\n", zck_missing_chunks(zck_tgt)); + while(zck_missing_chunks(zck_tgt) > 0) { + dl_ctx.range_fail = 0; + zck_dl_reset(dl); + zckRange *range = zck_get_missing_range(zck_tgt, dl_ctx.max_ranges); + if(range == NULL || !zck_dl_set_range(dl, range)) { + exit_val = 10; + goto out; + } + while(range_attempt[ra_index] > 1 && + range_attempt[ra_index+1] > zck_get_range_count(range)) + ra_index++; + char *range_string = zck_get_range_char(zck_src, range); + if(range_string == NULL) { + exit_val = 10; + goto out; + } + int retval = dl_range(&dl_ctx, arguments.args[0], range_string, 1); + if(retval == -1) { + if(dl_ctx.max_ranges > 1) { + ra_index += 1; + dl_ctx.max_ranges = range_attempt[ra_index]; + } + LOG_ERROR("Tried downloading too many ranges, reducing to %i\n", + dl_ctx.max_ranges); + } + if(!zck_dl_set_range(dl, NULL)) { + exit_val = 10; + goto out; + } + zck_range_free(&range); + if(!retval) { + exit_val = 1; + goto out; + } + } + } + printf("Downloaded %lu bytes\n", + (long unsigned)zck_dl_get_bytes_downloaded(dl)); + if(ftruncate(dst_fd, zck_get_length(zck_tgt)) < 0) { + perror(NULL); + exit_val = 10; + goto out; + } + + switch(zck_validate_data_checksum(zck_tgt)) { + case -1: + exit_val = 1; + break; + case 0: + exit_val = 1; + break; + default: + break; + } +out: + if(exit_val > 0) { + if(zck_is_error(NULL)) { + LOG_ERROR("%s", zck_get_error(NULL)); + zck_clear_error(NULL); } - total_size += zck_get_chunk_comp_size(tgt_idx); + if(zck_is_error(zck_src)) + LOG_ERROR("%s", zck_get_error(zck_src)); + if(zck_is_error(zck_tgt)) + LOG_ERROR("%s", zck_get_error(zck_tgt)); } - printf("Would download %li of %li bytes\n", (long)dl_size, - (long)total_size); - printf("Matched %li of %lu chunks\n", (long)matched_chunks, - (long unsigned)zck_get_chunk_count(zck_tgt)); + zck_dl_free(&dl); zck_free(&zck_tgt); zck_free(&zck_src); + curl_easy_cleanup(curl_ctx); + curl_global_cleanup(); + exit(exit_val); } \ No newline at end of file diff --git a/src/zck_read_header.c b/src/zck_read_header.c index 7f69fc4..c439205 100644 --- a/src/zck_read_header.c +++ b/src/zck_read_header.c @@ -180,9 +180,9 @@ int main (int argc, char *argv[]) { if (!digest_uncompressed) digest_uncompressed = ""; - if (chk == zck_get_first_chunk(zck)) { - bool has_uncompressed = (strlen(digest_uncompressed) > 0); - if (has_uncompressed) + if (chk == zck_get_first_chunk(zck)) { + bool has_uncompressed = (strlen(digest_uncompressed) > 0); + if (has_uncompressed) printf(" Chunk Checksum %*cChecksum uncompressed %*c Start Comp size Size\n", (((int)zck_get_chunk_digest_size(zck) * 2) - (int)strlen("Checksum")), ' ', ((int)zck_get_chunk_digest_size(zck) * 2) - (int)strlen("Uncompressed Checksum"), ' '); @@ -190,7 +190,7 @@ int main (int argc, char *argv[]) { printf(" Chunk Checksum %*c Start Comp size Size\n", (((int)zck_get_chunk_digest_size(zck) * 2) - (int)strlen("Checksum")), ' '); - } + } printf("%12lu %s %s %12lu %12lu %12lu", (long unsigned)zck_get_chunk_number(chk), digest, @@ -216,4 +216,4 @@ int main (int argc, char *argv[]) { } zck_free(&zck); return 1-valid_cks; -} \ No newline at end of file +} diff --git a/test/copy_chunks.c b/test/copy_chunks.c index 4b353f6..1756b5a 100644 --- a/test/copy_chunks.c +++ b/test/copy_chunks.c @@ -125,4 +125,4 @@ int main (int argc, char *argv[]) { zck_free(&src_zck); free(path); return 0; -} \ No newline at end of file +} diff --git a/test/empty.c b/test/empty.c index 28dbac2..3b67940 100644 --- a/test/empty.c +++ b/test/empty.c @@ -108,4 +108,4 @@ int main (int argc, char *argv[]) { zck_free(&zck); free(data); return 0; -} \ No newline at end of file +} diff --git a/test/shacheck.c b/test/shacheck.c index 9e7ae88..48f922f 100644 --- a/test/shacheck.c +++ b/test/shacheck.c @@ -125,4 +125,4 @@ int main (int argc, char *argv[]) { free(args); free(data); return 0; -} \ No newline at end of file +} diff --git a/test/zck_cmp_uncomp.c b/test/zck_cmp_uncomp.c index 2858a0e..bdf9205 100644 --- a/test/zck_cmp_uncomp.c +++ b/test/zck_cmp_uncomp.c @@ -240,4 +240,4 @@ int main (int argc, char *argv[]) { zck_free(&zckSrc); zck_free(&zckDst); close(dst_fd); -} \ No newline at end of file +} -- 2.30.2