When mirroring, write content directly, do not verify
authorColin Walters <walters@verbum.org>
Sun, 1 Feb 2015 16:09:47 +0000 (11:09 -0500)
committerColin Walters <walters@verbum.org>
Fri, 6 Feb 2015 02:24:21 +0000 (21:24 -0500)
When doing a pull --mirror from an archive-z2 repository into another
archive-z2 repository, currently we gunzip/checksum/gzip each content
object.  The re-gzip process in particular is fairly expensive.

This does assume that the upstream content is trusted and correct.
It'd be nice in the future to do at least a CRC check, if not the full
checksum.  (Could we append CRC data to the end of filez objects?)

We could also choose to only do this optimization if fetching over
TLS.

before: 1626 metadata, 20320 content objects fetched; 299634 KiB transferred in 62 seconds
after : 1626 metadata, 20320 content objects fetched; 299634 KiB transferred in 11 seconds

src/libostree/ostree-repo-commit.c
src/libostree/ostree-repo-private.h
src/libostree/ostree-repo-pull.c
tests/test-pull-mirror-summary.sh

index 054ed6590158f2d32df072323c830a585f24d71c..b3f3cc3fc1d03c122cca80f9016c570eb73337ba 100644 (file)
@@ -123,12 +123,62 @@ write_file_metadata_to_xattr (int fd,
   return TRUE;
 }
 
+gboolean
+_ostree_repo_commit_loose_final (OstreeRepo        *self,
+                                 const char        *checksum,
+                                 OstreeObjectType   objtype,
+                                 int                temp_dfd,
+                                 const char        *temp_filename,
+                                 GCancellable      *cancellable,
+                                 GError           **error)
+{
+  gboolean ret = FALSE;
+  gs_free gchar *tmp_dest = NULL;
+  int dest_dfd;
+  char tmpbuf[_OSTREE_LOOSE_PATH_MAX];
+  const char *dest;
+
+  if (self->in_transaction)
+    {
+      _ostree_repo_get_tmpobject_path (self, tmpbuf, checksum, objtype);
+      tmp_dest = g_strdup (tmpbuf);
+      dest_dfd = self->tmp_dir_fd;
+      dest = tmp_dest;
+    }
+  else
+    {
+      _ostree_loose_path (tmpbuf, checksum, OSTREE_OBJECT_TYPE_FILE, self->mode);
+      
+      if (!_ostree_repo_ensure_loose_objdir_at (self->objects_dir_fd, tmpbuf,
+                                                cancellable, error))
+        goto out;
+
+      dest_dfd = self->objects_dir_fd;
+      dest = tmpbuf;
+    }
+
+  if (G_UNLIKELY (renameat (temp_dfd, temp_filename,
+                            dest_dfd, dest) == -1))
+    {
+      if (errno != EEXIST)
+        {
+          gs_set_error_from_errno (error, errno);
+          g_prefix_error (error, "Storing file '%s': ", temp_filename);
+          goto out;
+        }
+      else
+        (void) unlinkat (temp_dfd, temp_filename, 0);
+    }
+
+  ret = TRUE;
+ out:
+  return ret;
+}
 
 static gboolean
 commit_loose_object_trusted (OstreeRepo        *self,
                              const char        *checksum,
                              OstreeObjectType   objtype,
-                             const char        *loose_path,
                              const char        *temp_filename,
                              gboolean           object_is_symlink,
                              guint32            uid,
@@ -268,43 +318,12 @@ commit_loose_object_trusted (OstreeRepo        *self,
             }
         }
     }
-  
-  if (!_ostree_repo_ensure_loose_objdir_at (self->objects_dir_fd, loose_path,
-                                            cancellable, error))
-    goto out;
 
-  {
-    gs_free gchar *tmp_dest = NULL;
-    int dir;
-    const char *dest;
-
-    if (self->in_transaction)
-      {
-        char tmpbuf[_OSTREE_LOOSE_PATH_MAX];
-        _ostree_repo_get_tmpobject_path (self, tmpbuf, checksum, objtype);
-        tmp_dest = g_strdup (tmpbuf);
-        dir = self->tmp_dir_fd;
-        dest = tmp_dest;
-      }
-    else
-      {
-        dir = self->objects_dir_fd;
-        dest = loose_path;
-      }
-
-    if (G_UNLIKELY (renameat (self->tmp_dir_fd, temp_filename,
-                              dir, dest) == -1))
-      {
-        if (errno != EEXIST)
-          {
-            gs_set_error_from_errno (error, errno);
-            g_prefix_error (error, "Storing file '%s': ", temp_filename);
-            goto out;
-          }
-        else
-          (void) unlinkat (self->tmp_dir_fd, temp_filename, 0);
-      }
-  }
+  if (!_ostree_repo_commit_loose_final (self, checksum, objtype,
+                                        self->tmp_dir_fd, temp_filename,
+                                        cancellable, error))
+    goto out;
+  
   ret = TRUE;
  out:
   return ret;
@@ -515,14 +534,10 @@ _ostree_repo_commit_trusted_content_bare (OstreeRepo          *self,
                                           GError             **error)
 {
   gboolean ret = FALSE;
-  char loose_objpath[_OSTREE_LOOSE_PATH_MAX];
 
   if (state->fd != -1)
     {
-      _ostree_loose_path (loose_objpath, checksum, OSTREE_OBJECT_TYPE_FILE, self->mode);
-      
       if (!commit_loose_object_trusted (self, checksum, OSTREE_OBJECT_TYPE_FILE,
-                                        loose_objpath,
                                         state->temp_filename,
                                         FALSE, uid, gid, mode,
                                         xattrs, state->fd,
@@ -778,7 +793,6 @@ write_object (OstreeRepo         *self,
         fd = g_file_descriptor_based_get_fd ((GFileDescriptorBased*)temp_out);
       
       if (!commit_loose_object_trusted (self, actual_checksum, objtype,
-                                        loose_objpath,
                                         temp_filename,
                                         object_is_symlink,
                                         uid, gid, mode,
index a19108f1cba7c89ae112f3ec7a644be22c44a687..e55ed60774bfc5e91fede6b0e1a4b0bd0e9a422a 100644 (file)
@@ -194,6 +194,15 @@ _ostree_repo_gpg_verify_file_with_metadata (OstreeRepo          *self,
                                             GCancellable        *cancellable,
                                             GError             **error);
 
+gboolean
+_ostree_repo_commit_loose_final (OstreeRepo        *self,
+                                 const char        *checksum,
+                                 OstreeObjectType   objtype,
+                                 int                temp_dfd,
+                                 const char        *temp_filename,
+                                 GCancellable      *cancellable,
+                                 GError           **error);
+
 typedef struct {
   int fd;
   char *temp_filename;
index e61538af9755d84e9628ae5e3454208971ddcddc..fd19d687edf763ade84f7c3d41fdf30128073472 100644 (file)
@@ -82,6 +82,8 @@ typedef struct {
   int               maxdepth;
   guint64           start_time;
 
+  gboolean          is_mirror;
+
   char         *dir;
   gboolean      commitpartial_exists;
 
@@ -596,32 +598,54 @@ content_fetch_on_complete (GObject        *object,
   g_assert (objtype == OSTREE_OBJECT_TYPE_FILE);
 
   g_debug ("fetch of %s complete", ostree_object_to_string (checksum, objtype));
-  
-  if (!ostree_content_file_parse_at (TRUE, pull_data->tmpdir_dfd, temp_path, FALSE,
-                                     &file_in, &file_info, &xattrs,
-                                     cancellable, error))
+
+  if (pull_data->is_mirror && pull_data->repo->mode == OSTREE_REPO_MODE_ARCHIVE_Z2)
     {
-      /* If it appears corrupted, delete it */
-      (void) unlinkat (pull_data->tmpdir_dfd, temp_path, 0);
-      goto out;
-    }
+      gboolean have_object;
+      if (!ostree_repo_has_object (pull_data->repo, OSTREE_OBJECT_TYPE_FILE, checksum,
+                                   &have_object,
+                                   cancellable, error))
+        goto out;
 
-  /* Also, delete it now that we've opened it, we'll hold
-   * a reference to the fd.  If we fail to write later, then
-   * the temp space will be cleaned up.
-   */
-  (void) unlinkat (pull_data->tmpdir_dfd, temp_path, 0);
+      if (!have_object)
+        {
+          if (!_ostree_repo_commit_loose_final (pull_data->repo, checksum, OSTREE_OBJECT_TYPE_FILE,
+                                                pull_data->tmpdir_dfd, temp_path,
+                                                cancellable, error))
+            goto out;
+        }
+      pull_data->n_fetched_content++;
+    }
+  else
+    {
+      /* Non-mirroring path */
+      
+      if (!ostree_content_file_parse_at (TRUE, pull_data->tmpdir_dfd, temp_path, FALSE,
+                                         &file_in, &file_info, &xattrs,
+                                         cancellable, error))
+        {
+          /* If it appears corrupted, delete it */
+          (void) unlinkat (pull_data->tmpdir_dfd, temp_path, 0);
+          goto out;
+        }
 
-  if (!ostree_raw_file_to_content_stream (file_in, file_info, xattrs,
-                                          &object_input, &length,
-                                          cancellable, error))
-    goto out;
+      /* Also, delete it now that we've opened it, we'll hold
+       * a reference to the fd.  If we fail to write later, then
+       * the temp space will be cleaned up.
+       */
+      (void) unlinkat (pull_data->tmpdir_dfd, temp_path, 0);
+      
+      if (!ostree_raw_file_to_content_stream (file_in, file_info, xattrs,
+                                              &object_input, &length,
+                                              cancellable, error))
+        goto out;
   
-  pull_data->n_outstanding_content_write_requests++;
-  ostree_repo_write_content_async (pull_data->repo, checksum,
-                                   object_input, length,
-                                   cancellable,
-                                   content_fetch_on_write_complete, fetch_data);
+      pull_data->n_outstanding_content_write_requests++;
+      ostree_repo_write_content_async (pull_data->repo, checksum,
+                                       object_input, length,
+                                       cancellable,
+                                       content_fetch_on_write_complete, fetch_data);
+    }
 
  out:
   pull_data->n_outstanding_content_fetches--;
@@ -1589,7 +1613,6 @@ ostree_repo_pull_with_options (OstreeRepo             *self,
   OstreeRepoPullFlags flags = 0;
   const char *dir_to_pull = NULL;
   char **refs_to_fetch = NULL;
-  gboolean is_mirror;
   GSource *update_timeout = NULL;
   GSource *idle_src;
 
@@ -1609,7 +1632,7 @@ ostree_repo_pull_with_options (OstreeRepo             *self,
   if (dir_to_pull)
     g_return_val_if_fail (dir_to_pull[0] == '/', FALSE);
 
-  is_mirror = (flags & OSTREE_REPO_PULL_FLAGS_MIRROR) > 0;
+  pull_data->is_mirror = (flags & OSTREE_REPO_PULL_FLAGS_MIRROR) > 0;
 
   pull_data->async_error = error;
   pull_data->main_context = g_main_context_ref_thread_default ();
@@ -1824,7 +1847,7 @@ ostree_repo_pull_with_options (OstreeRepo             *self,
 
   pull_data->static_delta_superblocks = g_ptr_array_new_with_free_func ((GDestroyNotify)g_variant_unref);
 
-  if (is_mirror && !refs_to_fetch && !configured_branches)
+  if (pull_data->is_mirror && !refs_to_fetch && !configured_branches)
     {
       SoupURI *summary_uri = NULL;
       gs_unref_bytes GBytes *bytes = NULL;
@@ -2034,7 +2057,7 @@ ostree_repo_pull_with_options (OstreeRepo             *self,
         }
       else
         {
-          ostree_repo_transaction_set_ref (pull_data->repo, is_mirror ? NULL : pull_data->remote_name, ref, checksum);
+          ostree_repo_transaction_set_ref (pull_data->repo, pull_data->is_mirror ? NULL : pull_data->remote_name, ref, checksum);
         }
     }
 
index 055df900d8c2c1c06c60a53de16b1531c2aec5f2..a09b152c78db74c5f6b641182d7ac1781da2f11b 100755 (executable)
@@ -38,7 +38,7 @@ ostree --repo=${test_tmpdir}/ostree-srv/gnomerepo summary -u
 
 cd ${test_tmpdir}
 mkdir repo
-ostree --repo=repo init
+ostree --repo=repo init --mode=archive-z2
 ostree --repo=repo remote add --set=gpg-verify=false origin $(cat httpd-address)/ostree/gnomerepo
 ostree --repo=repo pull --mirror origin
 ostree --repo=repo checkout -U main main-copy