Add functions to extract a single chunk
authorJonathan Dieter <jdieter@gmail.com>
Sat, 1 Dec 2018 22:19:12 +0000 (22:19 +0000)
committerJonathan Dieter <jdieter@gmail.com>
Sat, 1 Dec 2018 22:19:12 +0000 (22:19 +0000)
Signed-off-by: Jonathan Dieter <jdieter@gmail.com>
include/zck.h.in
src/lib/comp/comp.c
src/lib/dl/dl.c
src/lib/hash/hash.c
src/lib/index/index_create.c
src/lib/index/index_read.c
src/lib/zck_private.h
test/meson.build
test/read_single_chunk.c [new file with mode: 0644]
test/read_single_comp_chunk.c [new file with mode: 0644]

index 034b7e668712b60be25c35d927d77d8d6dd08be2..065dba188bbbe7487559b0c952f26a47010538b6 100644 (file)
@@ -169,6 +169,7 @@ int zck_failed_chunks(zckCtx *zck)
 /* Reset failed chunks to become missing */
 void zck_reset_failed_chunks(zckCtx *zck);
 
+
 /*******************************************************************
  * The functions should be all you need to read and write a zchunk
  * file.  After this point are advanced functions with an unstable
@@ -252,10 +253,20 @@ int zck_get_chunk_valid(zckChunk *idx)
 /* Get chunk digest */
 char *zck_get_chunk_digest(zckChunk *item)
     __attribute__ ((warn_unused_result));
+/* Get digest size of chunk hash type */
+ssize_t zck_get_chunk_digest_size(zckCtx *zck)
+    __attribute__ ((warn_unused_result));
+/* Get chunk data */
+ssize_t zck_get_chunk_data(zckChunk *idx, char *dst, size_t dst_size)
+    __attribute__ ((warn_unused_result));
+/* Get compressed chunk data */
+ssize_t zck_get_chunk_comp_data(zckChunk *idx, char *dst, size_t dst_size)
+    __attribute__ ((warn_unused_result));
 /* Find out if two chunk digests are the same */
 bool zck_compare_chunk_digest(zckChunk *a, zckChunk *b)
     __attribute__ ((warn_unused_result));
 
+
 /*******************************************************************
  * Advanced hash functions
  *******************************************************************/
@@ -268,9 +279,6 @@ ssize_t zck_get_full_digest_size(zckCtx *zck)
 /* Get chunk hash type */
 int zck_get_chunk_hash_type(zckCtx *zck)
     __attribute__ ((warn_unused_result));
-/* Get digest size of chunk hash type */
-ssize_t zck_get_chunk_digest_size(zckCtx *zck)
-    __attribute__ ((warn_unused_result));
 /* Get name of hash type */
 const char *zck_hash_name_from_type(int hash_type)
     __attribute__ ((warn_unused_result));
index c3bd09a65955ccdc44d858ff1076fa188242965d..1a3ea55c082d4ad29427eda4cf6d8fb2511c8ba7 100644 (file)
@@ -270,10 +270,9 @@ bool comp_reset(zckCtx *zck) {
     return zck->comp.close(zck, &(zck->comp));
 }
 
-bool comp_close(zckCtx *zck) {
+bool comp_reset_comp_data(zckCtx *zck) {
     ALLOCD_BOOL(zck, zck);
 
-    zck_log(ZCK_LOG_DEBUG, "Closing compression");
     if(zck->comp.data) {
         free(zck->comp.data);
         zck->comp.data = NULL;
@@ -281,6 +280,14 @@ bool comp_close(zckCtx *zck) {
         zck->comp.data_loc = 0;
         zck->comp.data_idx = NULL;
     }
+    return true;
+}
+
+bool comp_close(zckCtx *zck) {
+    ALLOCD_BOOL(zck, zck);
+
+    zck_log(ZCK_LOG_DEBUG, "Closing compression");
+    comp_reset_comp_data(zck);
     if(zck->comp.dict)
         free(zck->comp.dict);
     zck->comp.dict = NULL;
@@ -415,8 +422,8 @@ ssize_t comp_read(zckCtx *zck, char *dst, size_t dst_size, bool use_dict) {
         return 0;
 
     /* Read dictionary if it exists and hasn't been read yet */
-    if(use_dict && !zck->comp.data_eof && zck->comp.data_idx == NULL &&
-       zck->index.first->length > 0 && !import_dict(zck))
+    if(use_dict && zck->index.first->length > 0 && zck->comp.dict == NULL &&
+       !import_dict(zck))
         return -1;
 
     size_t dc = 0;
@@ -630,6 +637,78 @@ ssize_t PUBLIC zck_end_chunk(zckCtx *zck) {
 
 ssize_t PUBLIC zck_read(zckCtx *zck, char *dst, size_t dst_size) {
     VALIDATE_READ_INT(zck);
+    ALLOCD_INT(zck, dst);
+
+    return comp_read(zck, dst, dst_size, 1);
+}
+
+ssize_t PUBLIC zck_get_chunk_comp_data(zckChunk *idx, char *dst,
+                                       size_t dst_size) {
+    zckCtx *zck = NULL;
+    if(idx && idx->zck) {
+        VALIDATE_INT(idx->zck);
+        zck = idx->zck;
+    }
+    ALLOCD_INT(zck, idx);
+    ALLOCD_INT(zck, dst);
+
+    /* Make sure chunk size is valid */
+    if(zck_get_chunk_size(idx) < 0)
+        return -1;
+    /* If the chunk is empty, we're done */
+    if(zck_get_chunk_size(idx) == 0)
+        return 0;
+
+    /* Seek to beginning of requested chunk */
+    if(!seek_data(zck, zck_get_chunk_start(idx), SEEK_SET))
+        return -1;
+
+    /* Return read chunk */
+    return read_data(zck, dst, dst_size);
+}
+
+ssize_t PUBLIC zck_get_chunk_data(zckChunk *idx, char *dst,
+                                  size_t dst_size) {
+    zckCtx *zck = NULL;
+    if(idx && idx->zck) {
+        VALIDATE_INT(idx->zck);
+        zck = idx->zck;
+    }
+    ALLOCD_INT(zck, idx);
+    ALLOCD_INT(zck, dst);
+
+    /* Make sure chunk size is valid */
+    if(zck_get_chunk_size(idx) < 0)
+        return -1;
+    /* If the chunk is empty, we're done */
+    if(zck_get_chunk_size(idx) == 0)
+        return 0;
+
+    /* Read dictionary if needed */
+    zckChunk *dict = zck_get_first_chunk(zck);
+    if(dict == NULL)
+        return -1;
+    if(zck_get_chunk_size(dict) > 0 && zck->comp.dict == NULL) {
+        if(!seek_data(zck, zck_get_chunk_start(dict), SEEK_SET))
+            return -1;
+        if(!comp_reset(zck))
+            return -1;
+        if(!comp_init(zck))
+            return -1;
+        if(!import_dict(zck))
+            return -1;
+    }
+
+    /* Seek to beginning of requested chunk */
+    if(!comp_reset_comp_data(zck))
+        return -1;
+    if(!comp_reset(zck))
+        return -1;
+    if(!comp_init(zck))
+        return -1;
+    if(!seek_data(zck, zck_get_chunk_start(idx), SEEK_SET))
+        return -1;
+    zck->comp.data_idx = idx;
 
     return comp_read(zck, dst, dst_size, 1);
 }
index 901bac2d3540857e3204c48649dee00cb635803d..ce50caabed6ccf6130641bf1aebac417d9b29af7 100644 (file)
@@ -80,8 +80,7 @@ static bool set_chunk_valid(zckDL *dl) {
     ALLOCD_BOOL(NULL, dl);
     VALIDATE_BOOL(dl->zck);
 
-    int retval = validate_chunk(dl->zck, dl->tgt_check, ZCK_LOG_WARNING,
-                                dl->tgt_number);
+    int retval = validate_chunk(dl->tgt_check, ZCK_LOG_WARNING);
     if(retval < 1) {
         if(!zero_chunk(dl->zck, dl->tgt_check))
             return false;
index 69d543443122c53ea77e938015dba5dad2b20cc9..d2b00417dcbeabb0c44758b18a0975a215665ba2 100644 (file)
@@ -90,8 +90,7 @@ static int validate_checksums(zckCtx *zck, zck_log_type bad_checksums) {
 
     /* Check each chunk checksum */
     bool all_good = true;
-    int count = 0;
-    for(zckChunk *idx = zck->index.first; idx; idx = idx->next, count++) {
+    for(zckChunk *idx = zck->index.first; idx; idx = idx->next) {
         if(idx == zck->index.first && idx->length == 0) {
             idx->valid = 1;
             continue;
@@ -113,7 +112,7 @@ static int validate_checksums(zckCtx *zck, zck_log_type bad_checksums) {
                 return 0;
             rlen += rsize;
         }
-        int valid_chunk = validate_chunk(zck, idx, bad_checksums, count);
+        int valid_chunk = validate_chunk(idx, bad_checksums);
         if(!valid_chunk)
             return 0;
         idx->valid = valid_chunk;
@@ -333,17 +332,18 @@ bool set_chunk_hash_type(zckCtx *zck, int hash_type) {
 }
 
 /* Validate chunk, returning -1 if checksum fails, 1 if good, 0 if error */
-int validate_chunk(zckCtx *zck, zckChunk *idx,
-                       zck_log_type bad_checksum, int chunk_number) {
-    VALIDATE_BOOL(zck);
-    if(idx == NULL) {
-        set_error(zck, "Index not initialized");
-        return 0;
+int validate_chunk(zckChunk *idx, zck_log_type bad_checksum) {
+    zckCtx *zck = NULL;
+    if(idx && idx->zck) {
+        VALIDATE_INT(idx->zck);
+        zck = idx->zck;
     }
+    ALLOCD_INT(zck, idx);
 
     char *digest = hash_finalize(zck, &(zck->check_chunk_hash));
     if(digest == NULL) {
         set_error(zck, "Unable to calculate chunk checksum");
+        idx->valid = 0;
         return 0;
     }
     if(idx->comp_length == 0)
@@ -356,25 +356,27 @@ int validate_chunk(zckCtx *zck, zckChunk *idx,
     free(pdigest);
     if(memcmp(digest, idx->digest, idx->digest_size) != 0) {
         free(digest);
-        if(chunk_number == -1)
+        if(idx->number == -1)
             zck_log(bad_checksum, "Chunk checksum: FAILED!");
         else
             zck_log(bad_checksum, "Chunk %i's checksum: FAILED",
-                    chunk_number);
+                    idx->number);
+        idx->valid = -1;
         return -1;
     }
-    if(chunk_number == -1)
+    if(idx->number == -1)
         zck_log(ZCK_LOG_DEBUG, "Chunk checksum: valid");
     else
-        zck_log(ZCK_LOG_DEBUG, "Chunk %i's checksum: valid", chunk_number);
+        zck_log(ZCK_LOG_DEBUG, "Chunk %i's checksum: valid", idx->number);
     free(digest);
+    idx->valid = 1;
     return 1;
 }
 
 int validate_current_chunk(zckCtx *zck) {
     VALIDATE_BOOL(zck);
 
-    return validate_chunk(zck, zck->comp.data_idx, ZCK_LOG_ERROR, -1);
+    return validate_chunk(zck->comp.data_idx, ZCK_LOG_ERROR);
 }
 
 int validate_file(zckCtx *zck, zck_log_type bad_checksums) {
index 49a4610e7bf26d2bf505cdd96233928ba16a459f..b5a372ee183a90c31ceb2f44478c6aacadc18f03 100644 (file)
@@ -56,6 +56,7 @@ static bool finish_chunk(zckIndex *index, zckChunk *item, char *digest,
     item->start = index->length;
     item->valid = valid;
     item->zck = zck;
+    item->number = index->count;
     if(index->first == NULL) {
         index->first = item;
     } else {
index a8b72726a48b94be98ca50f28ec3267ed6a2266b..5f8c7cb144756405a238f8dbc6474cb8fb4fb52d 100644 (file)
@@ -96,6 +96,7 @@ bool index_read(zckCtx *zck, char *data, size_t size, size_t max_length) {
         new->length = chunk_length;
         new->zck = zck;
         new->valid = 0;
+        new->number = count;
         idx_loc += new->comp_length;
         count++;
         zck->index.length = idx_loc;
index 0e4cbc5e7843bf46816cc41f039a4dab16bf651b..89ff3bedc0203b5796af7c5fbddd6d3857370d6f 100644 (file)
@@ -153,6 +153,7 @@ typedef struct zckChunk {
     char *digest;
     int digest_size;
     int valid;
+    size_t number;
     size_t start;
     size_t comp_length;
     size_t length;
@@ -312,8 +313,7 @@ char *hash_finalize(zckCtx *zck, zckHash *hash)
     __attribute__ ((warn_unused_result));
 void hash_close(zckHash *hash);
 void hash_reset(zckHashType *ht);
-int validate_chunk(zckCtx *zck, zckChunk *idx, zck_log_type bad_checksum,
-                   int chunk_number)
+int validate_chunk(zckChunk *idx, zck_log_type bad_checksum)
     __attribute__ ((warn_unused_result));
 int validate_file(zckCtx *zck, zck_log_type bad_checksums)
     __attribute__ ((warn_unused_result));
index eeb654c6976252bbea289d1d8956e2d203916e37..8f6e267c0397633f81b70e7b58cfa4b88c1408e5 100644 (file)
@@ -11,6 +11,14 @@ invalid_input_checksum = executable('invalid_input_checksum',
                                     ['invalid_input_checksum.c'] + util_sources,
                                     include_directories: incdir,
                                     dependencies: [zstd_dep, openssl_dep])
+read_single_chunk = executable('read_single_chunk',
+                               ['read_single_chunk.c'] + util_sources,
+                               include_directories: incdir,
+                               dependencies: [zstd_dep, openssl_dep])
+read_single_comp_chunk = executable('read_single_comp_chunk',
+                                    ['read_single_comp_chunk.c'] + util_sources,
+                                    include_directories: incdir,
+                                    dependencies: [zstd_dep, openssl_dep])
 shacheck = executable('shacheck', ['shacheck.c'] + util_sources, include_directories: incdir, dependencies: [zstd_dep, openssl_dep])
 file_path = join_paths(meson.source_root(), 'test/files')
 
@@ -71,6 +79,22 @@ test(
     ]
 )
 
+test(
+    'read single chunk',
+    read_single_chunk,
+    args: [
+        join_paths(file_path, 'LICENSE.dict.fodt.zck')
+    ]
+)
+
+test(
+    'read single compressed chunk',
+    read_single_comp_chunk,
+    args: [
+        join_paths(file_path, 'LICENSE.dict.fodt.zck')
+    ]
+)
+
 test(
     'check verbosity in unzck',
     unzck,
@@ -166,6 +190,7 @@ test(
     ]
 )
 
+
 check_sha = '430c1963f71efe663272d39f7a7941d4a4e78d43c20caba8876a12f6a18eaeb1'
 if zstd_dep.found() and zstd_dep.version().version_compare('<=1.3.5')
     check_sha = '0418aaca75b6b64c3ac9bc50fc0974e48c76691869977907fad25eea834f3c85'
diff --git a/test/read_single_chunk.c b/test/read_single_chunk.c
new file mode 100644 (file)
index 0000000..d2e85a9
--- /dev/null
@@ -0,0 +1,102 @@
+/*
+ * Copyright 2018 Jonathan Dieter <jdieter@gmail.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *  2. Redistributions in binary form must reproduce the above copyright notice,
+ *     this list of conditions and the following disclaimer in the documentation
+ *     and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdbool.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <zck.h>
+#include "zck_private.h"
+#include "util.h"
+
+char *echecksum =
+    "31367eeea6aa48903f2b167149c468d85c4a5e0262b8b52a605e12abb174a18b";
+
+int main (int argc, char *argv[]) {
+    /* Open zchunk file and verify second checksum */
+    int in = open(argv[1], O_RDONLY);
+    if(in < 0) {
+        perror("Unable to open LICENSE.dict.fodt.zck for reading");
+        exit(1);
+    }
+
+    zckCtx *zck = zck_create();
+    if(zck == NULL)
+        exit(1);
+    if(!zck_init_read(zck, in)) {
+        printf("%s", zck_get_error(zck));
+        zck_free(&zck);
+        exit(1);
+    }
+    zckChunk *chunk = zck_get_first_chunk(zck);
+    if(chunk == NULL) {
+        printf("%s", zck_get_error(zck));
+        zck_free(&zck);
+        exit(1);
+    }
+    chunk = zck_get_next_chunk(chunk);
+    if(chunk == NULL) {
+        printf("%s", zck_get_error(zck));
+        zck_free(&zck);
+        exit(1);
+    }
+    ssize_t chunk_size = zck_get_chunk_size(chunk);
+    if(chunk_size < 0) {
+        printf("%s", zck_get_error(zck));
+        zck_free(&zck);
+        exit(1);
+    }
+    char *data = calloc(chunk_size, 1);
+    ssize_t read_size = zck_get_chunk_data(chunk, data, chunk_size);
+    if(read_size != chunk_size) {
+        if(read_size < 0)
+            printf("%s", zck_get_error(zck));
+        else
+            printf("chunk size didn't match expected size: %li != %li\n",
+                   read_size, chunk_size);
+        free(data);
+        zck_free(&zck);
+        exit(1);
+    }
+    char *cksum = get_hash(data, chunk_size, ZCK_HASH_SHA256);
+    printf("Calculated checksum: (SHA-256)%s\n", cksum);
+    printf("Expected checksum: (SHA-256)%s\n", echecksum);
+    if(memcmp(cksum, echecksum, strlen(echecksum)) != 0) {
+        free(data);
+        free(cksum);
+        zck_free(&zck);
+        printf("Checksums don't match!\n");
+        exit(1);
+    }
+    free(data);
+    free(cksum);
+    zck_free(&zck);
+}
diff --git a/test/read_single_comp_chunk.c b/test/read_single_comp_chunk.c
new file mode 100644 (file)
index 0000000..82c94ed
--- /dev/null
@@ -0,0 +1,102 @@
+/*
+ * Copyright 2018 Jonathan Dieter <jdieter@gmail.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *  2. Redistributions in binary form must reproduce the above copyright notice,
+ *     this list of conditions and the following disclaimer in the documentation
+ *     and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdbool.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <zck.h>
+#include "zck_private.h"
+#include "util.h"
+
+char *echecksum =
+    "0a1a0a63193752ceb4fcf6b5340011d7083f224fd0da1dd91563e0331129786a";
+
+int main (int argc, char *argv[]) {
+    /* Open zchunk file and verify second checksum */
+    int in = open(argv[1], O_RDONLY);
+    if(in < 0) {
+        perror("Unable to open LICENSE.dict.fodt.zck for reading");
+        exit(1);
+    }
+
+    zckCtx *zck = zck_create();
+    if(zck == NULL)
+        exit(1);
+    if(!zck_init_read(zck, in)) {
+        printf("%s", zck_get_error(zck));
+        zck_free(&zck);
+        exit(1);
+    }
+    zckChunk *chunk = zck_get_first_chunk(zck);
+    if(chunk == NULL) {
+        printf("%s", zck_get_error(zck));
+        zck_free(&zck);
+        exit(1);
+    }
+    chunk = zck_get_next_chunk(chunk);
+    if(chunk == NULL) {
+        printf("%s", zck_get_error(zck));
+        zck_free(&zck);
+        exit(1);
+    }
+    ssize_t chunk_size = zck_get_chunk_comp_size(chunk);
+    if(chunk_size < 0) {
+        printf("%s", zck_get_error(zck));
+        zck_free(&zck);
+        exit(1);
+    }
+    char *data = calloc(chunk_size, 1);
+    ssize_t read_size = zck_get_chunk_comp_data(chunk, data, chunk_size);
+    if(read_size != chunk_size) {
+        if(read_size < 0)
+            printf("%s", zck_get_error(zck));
+        else
+            printf("chunk size didn't match expected size: %li != %li\n",
+                   read_size, chunk_size);
+        free(data);
+        zck_free(&zck);
+        exit(1);
+    }
+    char *cksum = get_hash(data, chunk_size, ZCK_HASH_SHA256);
+    printf("Calculated checksum: (SHA-256)%s\n", cksum);
+    printf("Expected checksum: (SHA-256)%s\n", echecksum);
+    if(memcmp(cksum, echecksum, strlen(echecksum)) != 0) {
+        free(data);
+        free(cksum);
+        zck_free(&zck);
+        printf("Checksums don't match!\n");
+        exit(1);
+    }
+    free(data);
+    free(cksum);
+    zck_free(&zck);
+}