lib/commit: Use more direct path for regfile commits
authorColin Walters <walters@verbum.org>
Fri, 8 Dec 2017 18:55:39 +0000 (13:55 -0500)
committerAtomic Bot <atomic-devel@projectatomic.io>
Tue, 12 Dec 2017 14:17:20 +0000 (14:17 +0000)
In the non-`CONSUME` path for regfiles (which happens currently for
`bare-user`), we go to a lot of contortions to make an "object stream",
only to immediately parse it again.

Fixing this will also enable the `G_IS_FILE_DESCRIPTOR_BASED()` fast path in
commit, since the input stream will actually reference the file descriptor and
not be an `_OstreeChainInputStream`.

There's a slight concern here in that we're no longer checksumming *literally*
the object stream passed in for the stream case, but I mention in the comment,
the data should be the same, and if it's not somehow we're not adding risk,
since the checksum is still covering the data we actually care about.

Prep for further changes to break up the `write_content_object()` path into
separate paths for archive, as well as regfile vs symlink in non-archive.

Closes: #1371
Approved by: jlebon

src/libostree/ostree-repo-commit.c
src/libotutil/ot-checksum-instream.c
src/libotutil/ot-checksum-instream.h

index ca384fb8f3c5ce0d7f25d88714c8719128063dc0..50ffeac59377e681707f8677536bb7bf54c306cf 100644 (file)
@@ -597,7 +597,8 @@ static gboolean
 write_content_object (OstreeRepo         *self,
                       const char         *expected_checksum,
                       GInputStream       *input,
-                      guint64             file_object_length,
+                      GFileInfo          *file_info,
+                      GVariant           *xattrs,
                       guchar            **out_csum,
                       GCancellable       *cancellable,
                       GError            **error)
@@ -610,18 +611,30 @@ write_content_object (OstreeRepo         *self,
 
   OstreeRepoMode repo_mode = ostree_repo_get_mode (self);
 
+  GInputStream *file_input; /* Unowned alias */
+  g_autoptr(GInputStream) file_input_owned = NULL; /* We need a temporary for bare-user symlinks */
   glnx_unref_object OtChecksumInstream *checksum_input = NULL;
   if (out_csum)
-    checksum_input = ot_checksum_instream_new (input, G_CHECKSUM_SHA256);
-
-  g_autoptr(GInputStream) file_input = NULL;
-  g_autoptr(GVariant) xattrs = NULL;
-  g_autoptr(GFileInfo) file_info = NULL;
-  if (!ostree_content_stream_parse (FALSE, checksum_input ? (GInputStream*)checksum_input : input,
-                                    file_object_length, FALSE,
-                                    &file_input, &file_info, &xattrs,
-                                    cancellable, error))
-    return FALSE;
+    {
+      /* Previously we checksummed the input verbatim; now
+       * ostree_repo_write_content() parses without checksumming, then we
+       * re-synthesize a header here. The data should be identical; if somehow
+       * it's not that's not a serious problem because we're still computing a
+       * checksum over the data we actually use.
+       */
+      g_autoptr(GBytes) header = _ostree_file_header_new (file_info, xattrs);
+      size_t len;
+      const guint8 *buf = g_bytes_get_data (header, &len);
+      /* Give a null input if there's no content */
+      g_autoptr(GInputStream) null_input = NULL;
+      if (!input)
+        null_input = input = g_memory_input_stream_new_from_data ("", 0, NULL);
+      checksum_input = ot_checksum_instream_new_with_start (input, G_CHECKSUM_SHA256,
+                                                            buf, len);
+      file_input = (GInputStream*)checksum_input;
+    }
+  else
+    file_input = input;
 
   gboolean phys_object_is_symlink = FALSE;
   const GFileType object_file_type = g_file_info_get_file_type (file_info);
@@ -645,10 +658,8 @@ write_content_object (OstreeRepo         *self,
       const char *target_str = g_file_info_get_symlink_target (file_info);
       g_autoptr(GBytes) target = g_bytes_new (target_str, strlen (target_str) + 1);
 
-      if (file_input != NULL)
-        g_object_unref (file_input);
       /* Include the terminating zero so we can e.g. mmap this file */
-      file_input = g_memory_input_stream_new_from_bytes (target);
+      file_input = file_input_owned = g_memory_input_stream_new_from_bytes (target);
       size = g_bytes_get_size (target);
     }
   else if (!phys_object_is_symlink)
@@ -851,7 +862,7 @@ write_content_object (OstreeRepo         *self,
   /* Update statistics */
   g_mutex_lock (&self->txn_lock);
   self->txn.stats.content_objects_written++;
-  self->txn.stats.content_bytes_written += file_object_length;
+  self->txn.stats.content_bytes_written += g_file_info_get_size (file_info);
   self->txn.stats.content_objects_total++;
   g_mutex_unlock (&self->txn_lock);
 
@@ -2246,8 +2257,17 @@ ostree_repo_write_content (OstreeRepo       *self,
         }
     }
 
+  /* Parse the stream */
+  g_autoptr(GInputStream) file_input = NULL;
+  g_autoptr(GVariant) xattrs = NULL;
+  g_autoptr(GFileInfo) file_info = NULL;
+  if (!ostree_content_stream_parse (FALSE, object_input, length, FALSE,
+                                    &file_input, &file_info, &xattrs,
+                                    cancellable, error))
+    return FALSE;
+
   return write_content_object (self, expected_checksum,
-                               object_input, length, out_csum,
+                               file_input, file_info, xattrs, out_csum,
                                cancellable, error);
 }
 
@@ -3152,16 +3172,9 @@ write_content_to_mtree_internal (OstreeRepo                  *self,
             }
         }
 
-      g_autoptr(GInputStream) file_object_input = NULL;
-      guint64 file_obj_length;
-      if (!ostree_raw_file_to_content_stream (file_input,
-                                              modified_info, xattrs,
-                                              &file_object_input, &file_obj_length,
-                                              cancellable, error))
-        return FALSE;
       g_autofree guchar *child_file_csum = NULL;
-      if (!ostree_repo_write_content (self, NULL, file_object_input, file_obj_length,
-                                      &child_file_csum, cancellable, error))
+      if (!write_content_object (self, NULL, file_input, modified_info, xattrs,
+                                 &child_file_csum, cancellable, error))
         return FALSE;
 
       char tmp_checksum[OSTREE_SHA256_STRING_LEN+1];
index 342b14b4f7362ae809da3f7e44cba6a85d81af52..a536d7bb0bda8d5a6480cf7367f7e41e47fc2f1a 100644 (file)
@@ -65,6 +65,16 @@ ot_checksum_instream_init (OtChecksumInstream *self)
 OtChecksumInstream *
 ot_checksum_instream_new (GInputStream    *base,
                           GChecksumType    checksum_type)
+{
+  return ot_checksum_instream_new_with_start (base, checksum_type, NULL, 0);
+}
+
+/* Initialize a checksum stream with starting state from data */
+OtChecksumInstream *
+ot_checksum_instream_new_with_start (GInputStream   *base,
+                                     GChecksumType   checksum_type,
+                                     const guint8   *buf,
+                                     size_t          len)
 {
   OtChecksumInstream *stream;
 
@@ -77,6 +87,8 @@ ot_checksum_instream_new (GInputStream    *base,
   /* For now */
   g_assert (checksum_type == G_CHECKSUM_SHA256);
   ot_checksum_init (&stream->priv->checksum);
+  if (buf)
+    ot_checksum_update (&stream->priv->checksum, buf, len);
 
   return (OtChecksumInstream*) (stream);
 }
index 410047a9e6e449ce4dcb476caa441904f1a623b7..c13c0898c487a2ced358f6ac80a146bf11394062 100644 (file)
@@ -51,6 +51,8 @@ struct _OtChecksumInstreamClass
 GType          ot_checksum_instream_get_type     (void) G_GNUC_CONST;
 
 OtChecksumInstream * ot_checksum_instream_new          (GInputStream   *stream, GChecksumType   checksum);
+OtChecksumInstream * ot_checksum_instream_new_with_start (GInputStream   *stream, GChecksumType   checksum,
+                                                          const guint8 *buf, size_t len);
 
 char * ot_checksum_instream_get_string (OtChecksumInstream *stream);