Add size of uncompressed chunk to index
authorStefano Babic <sbabic@denx.de>
Fri, 16 Jul 2021 07:40:35 +0000 (09:40 +0200)
committerStefano Babic <sbabic@denx.de>
Mon, 30 Aug 2021 09:51:05 +0000 (11:51 +0200)
Size of uncompressed chunk can be used to compare original and detect if
a chunk is required without having to convert it first to a zck file.

Signed-off-by: Stefano Babic <sbabic@denx.de>
12 files changed:
include/zck.h.in
src/lib/comp/comp.c
src/lib/dl/range.c
src/lib/hash/hash.c
src/lib/header.c
src/lib/index/index_common.c
src/lib/index/index_create.c
src/lib/index/index_read.c
src/lib/zck.c
src/lib/zck_private.h
src/zck.c
src/zck_read_header.c

index b847576ad364b80d5f1ed28758a2e62be97f6c46..a053b303c43a15d7b291362dd31bde6ee20b4db7 100644 (file)
@@ -26,6 +26,7 @@ typedef enum zck_ioption {
     ZCK_HASH_CHUNK_TYPE,        /* Set chunk hash type using zck_hash */
     ZCK_VAL_HEADER_HASH_TYPE,   /* Set what the header hash type *should* be */
     ZCK_VAL_HEADER_LENGTH,      /* Set what the header length *should* be */
+    ZCK_UNCOMP_HEADER,          /* Header should contain uncompressed size, too */
     ZCK_COMP_TYPE = 100,        /* Set compression type using zck_comp */
     ZCK_MANUAL_CHUNK,           /* Disable auto-chunking */
     ZCK_CHUNK_MIN,              /* Minimum chunk size when manual chunking */
@@ -263,6 +264,9 @@ char *zck_get_chunk_digest(zckChunk *item)
 /* Get digest size of chunk hash type */
 ssize_t zck_get_chunk_digest_size(zckCtx *zck)
     __attribute__ ((warn_unused_result));
+/* Get uncompressed chunk digest */
+char *zck_get_chunk_digest_uncompressed(zckChunk *item)
+    __attribute__ ((warn_unused_result));
 /* Get chunk data */
 ssize_t zck_get_chunk_data(zckChunk *idx, char *dst, size_t dst_size)
     __attribute__ ((warn_unused_result));
index 89b3301265f5dc42c65d4e362e2b4404c748a600..dbbbefe20b1e5c926401cc64d5886efc2304d5af 100644 (file)
@@ -158,6 +158,8 @@ static ssize_t comp_write(zckCtx *zck, const char *src, const size_t src_size) {
         free(dst);
         return -1;
     }
+    if(zck->has_uncompressed_source && !hash_update(zck, &(zck->work_index_hash_uncomp), src, src_size))
+        return -1;
     free(dst);
     return src_size;
 }
index e102fa9c40bbbacc6972ab318bb46587fb2d029b..a366f9bde697850d83dad082ba751562bf7a4b06 100644 (file)
@@ -54,7 +54,7 @@ static zckRangeItem *range_insert_new(zckCtx *zck, zckRangeItem *prev,
     }
     if(add_index)
         if(!index_new_chunk(zck, &(info->index), idx->digest, idx->digest_size,
-                            end-start+1, end-start+1, idx, false)) {
+                            idx->digest_uncompressed, end-start+1, end-start+1, idx, false)) {
             free(new);
             return NULL;
         }
index d2b00417dcbeabb0c44758b18a0975a215665ba2..25768ae2a299b3571ccad5a6f81e2b171e10d58b 100644 (file)
@@ -517,6 +517,16 @@ char PUBLIC *zck_get_chunk_digest(zckChunk *item) {
     return get_digest_string(item->digest, item->digest_size);
 }
 
+char PUBLIC *zck_get_chunk_digest_uncompressed(zckChunk *item) {
+    if(item == NULL)
+        return NULL;
+    if (!item->zck->has_uncompressed_source) {
+        return NULL;
+    }
+    return get_digest_string(item->digest_uncompressed, item->digest_size_uncompressed);
+}
+
+
 /* Returns 1 if all chunks are valid, -1 if even one isn't and 0 if error */
 int PUBLIC zck_find_valid_chunks(zckCtx *zck) {
     VALIDATE_READ_BOOL(zck);
index 38b587b28adeeba19dc20847e978e2cbb49b41a3..0d276f8e83313a2c2660b538de231d844d8e6780 100644 (file)
@@ -44,6 +44,10 @@ static bool check_flags(zckCtx *zck, size_t flags) {
     zck->has_optional_elems = flags & 2;
     if(zck->has_optional_elems)
         flags -= 2;
+    zck->has_uncompressed_source = flags & 4;
+    if(zck->has_uncompressed_source)
+        flags -= 4;
+
     flags = flags & (SIZE_MAX - 1);
     if(flags != 0) {
         set_fatal_error(zck, "Unknown flags(s) set");
@@ -177,13 +181,13 @@ static bool read_index(zckCtx *zck) {
     }
 
     char *header = NULL;
-    zck_log(ZCK_LOG_DEBUG, "Reading index");
     if(zck->lead_size + zck->preface_size + zck->index_size >
        zck->header_size) {
         set_fatal_error(zck, "Read past end of header");
         return false;
     }
     header = zck->header + zck->lead_size + zck->preface_size;
+    zck_log(ZCK_LOG_DEBUG, "Reading index at 0x%x", (unsigned long)(zck->lead_size + zck->preface_size));
     int max_length = zck->header_size - (zck->lead_size + zck->preface_size);
     if(!index_read(zck, header, zck->index_size, max_length))
         return false;
@@ -244,6 +248,8 @@ static bool preface_create(zckCtx *zck) {
     size_t flags = 0;
     if(zck->has_streams)
         flags &= 1;
+    if(zck->has_uncompressed_source)
+        flags |= 4;
     compint_from_size(header+length, flags, &length);
 
     /* Write out compression type and index size */
index 3456d2612544bdbca434c6a186eaafe87b857868..b20b713e999494fc54d4a2c6f21aab1e42dcd45f 100644 (file)
@@ -89,6 +89,7 @@ void clear_work_index(zckCtx *zck) {
         return;
 
     hash_close(&(zck->work_index_hash));
+    hash_close(&(zck->work_index_hash_uncomp));
     if(zck->work_index_item)
         index_free_item(&(zck->work_index_item));
 }
index 8cbc316ade79c23aee12c08cec7226fd83292ca5..a035c843268d0ce98a3ba587b00adb57aa03d713 100644 (file)
@@ -37,22 +37,28 @@ static bool create_chunk(zckCtx *zck) {
 
     clear_work_index(zck);
     zck->work_index_item = zmalloc(sizeof(zckChunk));
-    if(!hash_init(zck, &(zck->work_index_hash), &(zck->chunk_hash_type)))
+    if(!hash_init(zck, &(zck->work_index_hash), &(zck->chunk_hash_type)) ||
+      (!hash_init(zck, &(zck->work_index_hash_uncomp), &(zck->chunk_hash_type))))
         return false;
     return true;
 }
 
 static bool finish_chunk(zckIndex *index, zckChunk *item, char *digest,
-                        bool valid, zckCtx *zck) {
+                        char *digest_uncompressed, bool valid, zckCtx *zck) {
     VALIDATE_BOOL(zck);
     ALLOCD_BOOL(zck, index);
     ALLOCD_BOOL(zck, item);
 
     item->digest = zmalloc(index->digest_size);
+    item->digest_uncompressed = zmalloc(index->digest_size);
     if(digest) {
         memcpy(item->digest, digest, index->digest_size);
         item->digest_size = index->digest_size;
     }
+    if(digest_uncompressed) {
+        memcpy(item->digest_uncompressed, digest_uncompressed, index->digest_size);
+        item->digest_size_uncompressed = index->digest_size;
+    }
     item->start = index->length;
     item->valid = valid;
     item->zck = zck;
@@ -65,6 +71,15 @@ static bool finish_chunk(zckIndex *index, zckChunk *item, char *digest,
     index->last = item;
     index->count += 1;
     index->length += item->comp_length;
+
+    char *s = get_digest_string(digest, index->digest_size);
+    if (zck->has_uncompressed_source) {
+        char *s1 = get_digest_string(digest_uncompressed, index->digest_size);
+        zck_log(ZCK_LOG_DEBUG, "Index %d digest %s digest uncomp %s", index->count, s, s1);
+        free(s1);
+    } else
+        zck_log(ZCK_LOG_DEBUG, "Index %d digest %s", index->count, s);
+    free(s);
     return true;
 }
 
@@ -88,7 +103,8 @@ bool index_create(zckCtx *zck) {
     if(zck->index.first) {
         zckChunk *tmp = zck->index.first;
         while(tmp) {
-            index_malloc += zck->index.digest_size + MAX_COMP_SIZE*2;
+            index_malloc += (zck->has_uncompressed_source + 1) * zck->index.digest_size +
+                   MAX_COMP_SIZE * 2;
             tmp = tmp->next;
         }
     }
@@ -103,6 +119,11 @@ bool index_create(zckCtx *zck) {
             /* Write digest */
             memcpy(index+index_size, tmp->digest, zck->index.digest_size);
             index_size += zck->index.digest_size;
+           /* Write digest for uncompressed if any */
+           if (zck->has_uncompressed_source) {
+                memcpy(index+index_size, tmp->digest_uncompressed, zck->index.digest_size);
+                index_size += zck->index.digest_size;
+           }
             /* Write compressed size */
             compint_from_size(index+index_size, tmp->comp_length,
                                   &index_size);
@@ -121,7 +142,7 @@ bool index_create(zckCtx *zck) {
 }
 
 bool index_new_chunk(zckCtx *zck, zckIndex *index, char *digest,
-                     int digest_size, size_t comp_size, size_t orig_size,
+                     int digest_size, char *digest_uncompressed, size_t comp_size, size_t orig_size,
                      zckChunk *src, bool finished) {
     VALIDATE_BOOL(zck);
 
@@ -138,7 +159,7 @@ bool index_new_chunk(zckCtx *zck, zckIndex *index, char *digest,
     chk->comp_length = comp_size;
     chk->length = orig_size;
     chk->src = src;
-    return finish_chunk(index, chk, digest, finished, zck);
+    return finish_chunk(index, chk, digest, digest_uncompressed, finished, zck);
 }
 
 bool index_add_to_chunk(zckCtx *zck, char *data, size_t comp_size,
@@ -168,6 +189,7 @@ bool index_finish_chunk(zckCtx *zck) {
         return false;
 
     char *digest = NULL;
+    char *digest_uncompressed = NULL;
     if(zck->work_index_item->length > 0) {
         /* Finalize chunk checksum */
         digest = hash_finalize(zck, &(zck->work_index_hash));
@@ -177,16 +199,27 @@ bool index_finish_chunk(zckCtx *zck) {
                             zck_hash_name_from_type(zck->index.hash_type));
             return false;
         }
+        digest_uncompressed = hash_finalize(zck, &(zck->work_index_hash_uncomp));
+        if(digest_uncompressed == NULL) {
+            set_fatal_error(zck, "Unable to calculate %s checksum for new chunk",
+                            zck_hash_name_from_type(zck->index.hash_type));
+            free(digest);
+            return false;
+        }
     } else {
         digest = zmalloc(zck->chunk_hash_type.digest_size);
+        digest_uncompressed = zmalloc(zck->chunk_hash_type.digest_size);
     }
-    if(!finish_chunk(&(zck->index), zck->work_index_item, digest, true, zck)) {
+    if(!finish_chunk(&(zck->index), zck->work_index_item, digest, digest_uncompressed, true, zck)) {
         free(digest);
+        free(digest_uncompressed);
         return false;
     }
 
     free(digest);
+    free(digest_uncompressed);
     zck->work_index_item = NULL;
     hash_close(&(zck->work_index_hash));
+    hash_close(&(zck->work_index_hash_uncomp));
     return true;
 }
index 875b569be109d5d4f581548cb1fa846d116ec2f2..42b4efecdd1f7e6727302fe8797a147c861cc68a 100644 (file)
@@ -85,6 +85,14 @@ bool index_read(zckCtx *zck, char *data, size_t size, size_t max_length) {
                             new);
         length += zck->index.digest_size;
 
+        /* Read uncompressed entry digest, if any */
+        if (zck->has_uncompressed_source) {
+            /* same size for digest as compressed */
+            new->digest_uncompressed = zmalloc(zck->index.digest_size);
+            memcpy(new->digest_uncompressed, data+length, zck->index.digest_size);
+            new->digest_size_uncompressed = zck->index.digest_size;
+            length += zck->index.digest_size;
+       }
         /* Read and store entry length */
         size_t chunk_length = 0;
         if(!compint_to_size(zck, &chunk_length, data+length, &length,
index dece24b3282837e7f0a23bfe2b1e4e7ef7cb7d7e..d563350fa4f3f8e49e2cf7254cfe06bab7470129 100644 (file)
@@ -292,6 +292,8 @@ bool PUBLIC zck_set_ioption(zckCtx *zck, zck_ioption option, ssize_t value) {
         }
         zck->prep_hdr_size = value;
 
+    } else if(option == ZCK_UNCOMP_HEADER) {
+        zck->has_uncompressed_source = 1;
     /* Hash options */
     } else if(option < 100) {
         /* Currently no hash options other than setting hash type, so bail */
index a21e963b1b91731e7626888846f9b64506cdf3b5..49eb4c3e97fd27a9a6e913d9f051c451703aa7e4 100644 (file)
@@ -150,6 +150,8 @@ struct zckDL {
 struct zckChunk {
     char *digest;
     int digest_size;
+    char *digest_uncompressed;
+    int digest_size_uncompressed;
     int valid;
     size_t number;
     size_t start;
@@ -262,9 +264,12 @@ struct zckCtx {
     zckIndex index;
     zckChunk *work_index_item;
     zckHash work_index_hash;
+    zckChunk *work_index_item_uncomp;
+    zckHash work_index_hash_uncomp;
     size_t stream;
     int has_streams;
     int has_optional_elems;
+    int has_uncompressed_source;
 
     char *read_buf;
     size_t read_buf_size;
@@ -340,7 +345,7 @@ bool index_read(zckCtx *zck, char *data, size_t size, size_t max_length)
 bool index_create(zckCtx *zck)
     __attribute__ ((warn_unused_result));
 bool index_new_chunk(zckCtx *zck, zckIndex *index, char *digest, int digest_size,
-                     size_t comp_size, size_t orig_size, zckChunk *src, bool valid)
+                     char* digest_uncompressed, size_t comp_size, size_t orig_size, zckChunk *src, bool valid)
     __attribute__ ((warn_unused_result));
 bool index_add_to_chunk(zckCtx *zck, char *data, size_t comp_size,
                         size_t orig_size)
index 854591cfdeb68dd49a747d151a631458f5e8318a..9d8f01e79d07aa979125d6b51131268adf17c3c4 100644 (file)
--- a/src/zck.c
+++ b/src/zck.c
@@ -57,6 +57,8 @@ static struct argp_option options[] = {
      "Set zstd compression dictionary to FILE"},
     {"manual-chunk", 'm', 0,        0,
      "Don't do any automatic chunking (implies -s)"},
+    {"uncompressed", 'u', 0,        0,
+     "Add extension in header for uncompressed data"},
     {"version",      'V', 0,        0, "Show program version"},
     { 0 }
 };
@@ -69,6 +71,7 @@ struct arguments {
   char *output;
   char *dict;
   bool exit;
+  bool uncompressed;
 };
 
 static error_t parse_opt (int key, char *arg, struct argp_state *state) {
@@ -95,6 +98,9 @@ static error_t parse_opt (int key, char *arg, struct argp_state *state) {
         case 'D':
             arguments->dict = arg;
             break;
+        case 'u':
+            arguments->uncompressed = true;
+            break;
         case 'V':
             version();
             arguments->exit = true;
@@ -223,6 +229,12 @@ int main (int argc, char *argv[]) {
         }
     }
 
+    if(arguments.uncompressed) {
+        if(!zck_set_ioption(zck, ZCK_UNCOMP_HEADER, 1)) {
+            dprintf(STDERR_FILENO, "%s\n", zck_get_error(zck));
+            exit(1);
+        }
+    }
     char *data;
     int in_fd = open(arguments.args[0], O_RDONLY);
     off_t in_size = 0;
index a159ea5c2bccf012893bea9a18804e5dd54047b2..21e9bee6f11d0883f91a68b3fcee063d01ea2bd7 100644 (file)
@@ -176,9 +176,14 @@ int main (int argc, char *argv[]) {
                 dprintf(STDERR_FILENO, "%s", zck_get_error(zck));
                 exit(1);
             }
-            printf("%12lu %s %12lu %12lu %12lu",
+            char *digest_uncompressed = zck_get_chunk_digest_uncompressed(chk);
+            if (!digest_uncompressed)
+                digest_uncompressed = "";
+
+            printf("%12lu %s %s %12lu %12lu %12lu",
                    (long unsigned)zck_get_chunk_number(chk),
                    digest,
+                   digest_uncompressed,
                    (long unsigned)zck_get_chunk_start(chk),
                    (long unsigned)zck_get_chunk_comp_size(chk),
                    (long unsigned)zck_get_chunk_size(chk));