/* Get data digest */
char ZCK_PUBLIC_API *zck_get_data_digest(zckCtx *zck)
ZCK_WARN_UNUSED;
+/* Get whether this context is pointing to a detached header */
+bool ZCK_PUBLIC_API zck_is_detached_header(zckCtx *zck)
+ ZCK_WARN_UNUSED;
/*******************************************************************
for(zckChunk *idx = zck->index.first; idx; idx = idx->next) {
if(idx == zck->index.first && idx->length == 0) {
idx->valid = 1;
+ if(zck->header_only)
+ break;
continue;
}
idx->valid = valid_chunk;
if(all_good && valid_chunk != 1)
all_good = false;
+ if(zck->header_only)
+ break;
}
int valid_file = -1;
- if(all_good) {
- /* Check data checksum */
- valid_file = validate_file(zck, bad_checksums);
- if(!valid_file)
- return 0;
+ if(zck->has_uncompressed_source || zck->header_only) {
+ /* If we have an uncompressed source or are a detached header,
+ * skip meaningless full data checksum, and just set valid_file
+ * if the chunks (or dictionary, if we're a header) was good */
+ if(all_good)
+ valid_file = 1;
+ } else {
+ if(all_good) {
+ /* Check data checksum */
+ valid_file = validate_file(zck, bad_checksums);
+ if(!valid_file)
+ return 0;
- /* If data checksum failed, invalidate *all* chunks */
- if(valid_file == -1)
- for(zckChunk *idx = zck->index.first; idx; idx = idx->next)
- idx->valid = -1;
+ /* If data checksum failed, invalidate *all* chunks */
+ if(valid_file == -1)
+ for(zckChunk *idx = zck->index.first; idx; idx = idx->next)
+ idx->valid = -1;
+ }
}
/* Go back to beginning of data section */
return 1;
}
-/* Returns 1 if data hash matches, -1 if it doesn't and 0 if error */
+/* Returns 1 if data hash matches, -1 if it doesn't and 0 if error
+ *
+ * For a zchunk file with both compressed and uncompressed checksums, validate
+ * each chunk checksum independently, since there is no data hash */
int ZCK_PUBLIC_API zck_validate_data_checksum(zckCtx *zck) {
VALIDATE_READ_BOOL(zck);
if(zck->has_uncompressed_source) {
- zck_log(
- ZCK_LOG_DEBUG,
- "Skipping full file validation since uncompressed source flag is set"
- );
- return 1;
+ return validate_checksums(zck, ZCK_LOG_WARNING);
}
if(!seek_data(zck, zck->data_offset, SEEK_SET))
}
static bool read_header_from_file(zckCtx *zck) {
+ /* Verify that lead_size and header_length have been set */
+ if(zck->lead_size == 0 || zck->header_length == 0) {
+ set_error(zck, "Lead and header sizes are both 0. Have you run zck_read_lead() yet?");
+ return false;
+ }
+
/* Allocate header and store any extra bytes at beginning of header */
zck->header = zrealloc(zck->header, zck->lead_size + zck->header_length);
if (!zck->header) {
if(!hash_init(zck, &(zck->check_full_hash), &(zck->hash_type)))
return false;
- if(!hash_update(zck, &(zck->check_full_hash), zck->header,
- zck->hdr_digest_loc))
+ /* If we're reading a detached zchunk header, first five bytes will be
+ * different, breaking the header digest, so let's make things simple
+ * by forcing the first five bytes to be static */
+ if(!hash_update(zck, &(zck->check_full_hash), "\0ZCK1", 5))
return false;
+ /* Now hash the remaining lead */
+ if(!hash_update(zck, &(zck->check_full_hash), zck->header+5,
+ zck->hdr_digest_loc-5))
+ return false;
+ /* And the remaining header */
if(!hash_update(zck, &(zck->check_full_hash), header, zck->header_length))
return false;
int ret = validate_header(zck);
return false;
}
- if(memcmp(header, "\0ZCK1", 5) != 0) {
+ if(memcmp(header, "\0ZHR1", 5) == 0) {
+ zck->header_only = true;
+ } else if(memcmp(header, "\0ZCK1", 5) != 0) {
free(header);
set_error(zck, "Invalid lead, perhaps this is not a zck file?");
return false;
VALIDATE_INT(zck);
return get_flags(zck);
}
+
+bool ZCK_PUBLIC_API zck_is_detached_header(zckCtx *zck) {
+ VALIDATE_BOOL(zck);
+ return zck->header_only;
+}
static int log_fd = STDERR_FILENO;
#endif
-static logcallback callback = NULL;
+static logcallback callback = NULL;
void ZCK_PUBLIC_API zck_set_log_level(zck_log_type ll) {
log_level = ll;
size_t data_offset;
size_t header_length;
+ bool header_only;
char *header;
size_t header_size;
size_t hdr_digest_loc;
char *sig_string;
size_t sig_size;
-
char *prep_digest;
int prep_hash_type;
ssize_t prep_hdr_size;
{"verbose", 'v', 0, 0,
"Increase verbosity (can be specified more than once for debugging)"},
{"stdout", 'c', 0, 0, "Direct output to stdout"},
- {"dict", 1000, 0, 0, "Only extract the dictionary"},
+ {"dict", 1000, 0, 0, "Only extract the dictionary (can't be run with --header)"},
+ {"header", 1001, 0, 0, "Only extract the header (can't be run with --dict)"},
{"version", 'V', 0, 0, "Show program version"},
{ 0 }
};
char *args[1];
zck_log_type log_level;
bool dict;
+ bool header;
bool std_out;
bool exit;
};
version();
arguments->exit = true;
break;
- case 1000:
+ case 1000: // Header and dict can't both be set
arguments->dict = true;
+ arguments->header = false;
+ break;
+ case 1001: // Header and dict can't both be set
+ arguments->header = true;
+ arguments->dict = false;
break;
case ARGP_KEY_ARG:
if (state->arg_num >= 1) {
if(!arguments.std_out) {
if(strlen(arguments.args[0]) < 5 ||
- strcmp(arguments.args[0] + strlen(arguments.args[0]) - 4, ".zck") != 0) {
- LOG_ERROR("Not a *.zck file: %s\n", arguments.args[0]);
+ (strcmp(arguments.args[0] + strlen(arguments.args[0]) - 4, ".zck") != 0 &&
+ strcmp(arguments.args[0] + strlen(arguments.args[0]) - 4, ".zhr") != 0)) {
+ LOG_ERROR("Not a *.zck or *.zhr file: %s\n", arguments.args[0]);
exit(1);
}
}
char *out_name = NULL;
if(arguments.dict)
out_name = calloc(strlen(base_name) + 3, 1); // len .zck -> .zdict = +2
+ else if(arguments.header)
+ out_name = calloc(strlen(base_name), 1); // .zck -> zhr
else
- out_name = calloc(strlen(base_name) - 2, 1);
+ out_name = calloc(strlen(base_name) - 2, 1); // strip .zck
assert(out_name);
snprintf(out_name, strlen(base_name) - 3, "%s", base_name); //Strip off .zck
if(arguments.dict)
snprintf(out_name + strlen(base_name) - 4, 7, ".zdict");
+ else if(arguments.header)
+ snprintf(out_name + strlen(base_name) - 4, 5, ".zhr");
#ifdef _WIN32
int dst_fd = _fileno(stdout);
if(dict_size < 0) {
LOG_ERROR("%s", zck_get_error(zck));
goto error2;
+ } else if(dict_size == 0) {
+ LOG_ERROR("%s doesn't contain a dictionary\n", arguments.args[0]);
+ goto error2;
}
data = calloc(dict_size, 1);
assert(data);
}
good_exit = true;
goto error2;
+ } else if(arguments.header) {
+ if(zck_is_detached_header(zck)) {
+ LOG_ERROR("%s is already a detached header\n", arguments.args[0]);
+ goto error2;
+ }
+
+ ssize_t header_size = zck_get_header_length(zck);
+ if(header_size == -1) {
+ LOG_ERROR("%s", zck_get_error(zck));
+ goto error2;
+ }
+
+ zckChunk *dict = zck_get_first_chunk(zck);
+ ssize_t dict_size = zck_get_chunk_comp_size(dict);
+ if(dict_size < 0) {
+ LOG_ERROR("%s", zck_get_error(zck));
+ goto error2;
+ }
+
+ data = calloc(BUF_SIZE, 1);
+ if(data == NULL) {
+ LOG_ERROR("Unable to allocate %i bytes\n", BUF_SIZE);
+ goto error2;
+ }
+
+ if(lseek(src_fd, 5, SEEK_SET) < 0) {
+ perror("Unable to seek to beginning of source file");
+ exit(1);
+ }
+ write(dst_fd, "\0ZHR1", 5);
+ for(ssize_t i=5; i<header_size + dict_size; i+=BUF_SIZE) {
+ ssize_t write_size = i + BUF_SIZE < header_size + dict_size ? BUF_SIZE : header_size + dict_size - i;
+ ssize_t read_size = read(src_fd, data, write_size);
+ if(read_size < write_size) {
+ LOG_ERROR("Unable to read %llu bytes from source\n", (long long unsigned) write_size);
+ goto error2;
+ }
+ if(write(dst_fd, data, write_size) != write_size) {
+ LOG_ERROR("Error writing to %s\n", out_name);
+ goto error2;
+ }
+ }
+ good_exit = true;
+ goto error2;
}
+
+ if(zck_is_detached_header(zck)) {
+ LOG_ERROR("%s is a detached header, not a full zchunk file. The only operation unzck\n"
+ "can run on a detached header is --dict\n", arguments.args[0]);
+ goto error2;
+ }
+
int ret = zck_validate_data_checksum(zck);
if(ret < 1) {
if(ret == -1)
assert(data);
size_t total = 0;
while(true) {
- ssize_t read = zck_read(zck, data, BUF_SIZE);
- if(read < 0) {
+ ssize_t read_size = zck_read(zck, data, BUF_SIZE);
+ if(read_size < 0) {
LOG_ERROR("%s", zck_get_error(zck));
goto error2;
}
- if(read == 0)
+ if(read_size == 0)
break;
- if(write(dst_fd, data, read) != read) {
+ if(write(dst_fd, data, read_size) != read_size) {
LOG_ERROR("Error writing to %s\n", out_name);
goto error2;
}
- total += read;
+ total += read_size;
}
if(!zck_close(zck)) {
LOG_ERROR("%s", zck_get_error(zck));
LOG_ERROR("%s", zck_get_error(zck));
exit(1);
}
+ if(zck_is_detached_header(zck))
+ printf("zchunk detached header\n\n");
+ else
+ printf("zchunk file\n\n");
printf("Overall checksum type: %s\n",
zck_hash_name_from_type(zck_get_full_hash_type(zck)));
printf("Header size: %llu\n", (long long unsigned) zck_get_header_length(zck));
if(arguments.verify) {
if(zck_get_chunk_valid(chk) == 1)
printf(" +");
+ else if(zck_is_detached_header(zck) && zck_get_chunk_valid(chk) == 0)
+ ;
else
printf(" !");
}
ID
'\0ZCK1', identifies file as zchunk version 1 file
+ OR
+ '\0ZHR1', identifies file as zchunk detached header version 1 file
Checksum type
This is an integer containing the type of checksum used to generate the header
Header checksum
This is the checksum of everything from the beginning of the file until the end
- of the signatures, ignoring the header checksum.
+ of the signatures, ignoring the header checksum. For detached headers,
+ libraries should use '\0ZCK1' for the ID when calculating the checksum so it
+ matches the full zchunk file
The preface:
+=================+==========================+==================+
(Dict stream will only exist if flag 0 is set to 1)
-+======================+===============+==================+
-| Dict stream (ci) [0] | Dict checksum | Dict length (ci) |
-+======================+===============+==================+
++======================+===============+================================+
+| Dict stream (ci) [0] | Dict checksum | Uncompressed dict checksum [2] |
++======================+===============+================================+
-+===============================+
-| Uncompressed dict length (ci) |
-+===============================+
++==================+===============================+
+| Dict length (ci) | Uncompressed dict length (ci) |
++==================+===============================+
[+=======================+================+=================================+
[| Chunk stream (ci) [0] | Chunk checksum | Uncompressed chunk checksum [2] |
This is the checksum of the compressed dict, used to detect whether two dicts
are identical. If there is no dict, the checksum must be all zeros.
+NOTE: Uncompressed dict checksum will only exist if flag 2 is set to 1
+Uncompressed dict checksum
+ This is the checksum of the uncompressed dictionary, but really has no use as
+ the uncompressed source won't have a dictionary
+
Dict length
This is an integer containing the length of the dict. If there is no dict,
this must be a zero.
This is an integer containing the size of the signature.
Signature
- The actual signature. The signature MUST only apply to the header, excluding
- the header size, the header checksum, the signature count and the signatures.
+ The actual signature. The signature MUST only apply to the header, excluding:
+ * The header size
+ * The header checksum
+ * The signature count
+ * The signatures
The excluded data MUST be omitted when calculating the signature.
Signatures are designed so that anyone can add a new signature to a file