lib/pull: Change fetcher to return O_TMPFILE
authorColin Walters <walters@verbum.org>
Tue, 3 Oct 2017 01:36:10 +0000 (21:36 -0400)
committerAtomic Bot <atomic-devel@projectatomic.io>
Thu, 5 Oct 2017 14:58:20 +0000 (14:58 +0000)
A lot of the libostree code is honestly too complex for its
own good (this is mostly my fault).  The way we do HTTP writes
is still one of those.  The way the fetcher writes tempfiles,
then reads them back in is definitely one of those.

Now that we've dropped the "partial object" bits in:
https://github.com/ostreedev/ostree/pull/1176 i.e. commit
https://github.com/ostreedev/ostree/commit/0488b4870e80ef575d8b0edf6f2a9e5ad54bf4df
we can simplify things a lot more by having the fetcher
return an `O_TMPFILE` rather than a filename.

For trusted archive mirroring, we need to enable linking
in the tmpfiles directly.

Otherwise for at least content objects they're compressed, so we couldn't link
them in. For metadata, we need to do similar logic to what we have around
`mmap()` to only grab a tmpfile if the size is large enough.

Closes: #1252
Approved by: jlebon

src/libostree/ostree-fetcher-curl.c
src/libostree/ostree-fetcher-soup.c
src/libostree/ostree-fetcher-util.h
src/libostree/ostree-fetcher.h
src/libostree/ostree-repo-pull.c

index dc85d3fe4c1708b7413d5df0d9e179c50e10388f..1f64188258ed2d7ded5908e03f2854fbe4d666d2 100644 (file)
@@ -269,13 +269,13 @@ ensure_tmpfile (FetcherRequest *req, GError **error)
 {
   if (!req->tmpf.initialized)
     {
-      if (!glnx_open_tmpfile_linkable_at (req->fetcher->tmpdir_dfd, ".",
-                                          O_WRONLY | O_CLOEXEC, &req->tmpf,
-                                          error))
+      if (!_ostree_fetcher_tmpf_from_flags (req->flags, req->fetcher->tmpdir_dfd,
+                                            &req->tmpf, error))
         return FALSE;
     }
   return TRUE;
 }
+
 /* Check for completed transfers, and remove their easy handles */
 static void
 check_multi_info (OstreeFetcher *fetcher)
@@ -378,25 +378,19 @@ check_multi_info (OstreeFetcher *fetcher)
               g_autoptr(GError) local_error = NULL;
               GError **error = &local_error;
 
-              g_autofree char *tmpfile_path =
-                ostree_fetcher_generate_url_tmpname (eff_url);
               if (!ensure_tmpfile (req, error))
                 {
                   g_task_return_error (task, g_steal_pointer (&local_error));
                 }
-              /* This should match the libsoup chmod */
-              else if (fchmod (req->tmpf.fd, 0644) < 0)
+              else if (lseek (req->tmpf.fd, 0, SEEK_SET) < 0)
                 {
                   glnx_set_error_from_errno (error);
                   g_task_return_error (task, g_steal_pointer (&local_error));
                 }
-              else if (!glnx_link_tmpfile_at (&req->tmpf, GLNX_LINK_TMPFILE_REPLACE,
-                                              fetcher->tmpdir_dfd, tmpfile_path,
-                                              error))
-                g_task_return_error (task, g_steal_pointer (&local_error));
               else
                 {
-                  g_task_return_pointer (task, g_steal_pointer (&tmpfile_path), g_free);
+                  /* We return the tmpfile in the _finish wrapper */
+                  g_task_return_boolean (task, TRUE);
                 }
             }
         }
@@ -887,26 +881,21 @@ _ostree_fetcher_request_to_tmpfile (OstreeFetcher         *self,
 gboolean
 _ostree_fetcher_request_to_tmpfile_finish (OstreeFetcher *self,
                                            GAsyncResult  *result,
-                                           char         **out_filename,
+                                           GLnxTmpfile   *out_tmpf,
                                            GError       **error)
 {
-  GTask *task;
-  FetcherRequest *req;
-  gpointer ret;
-
   g_return_val_if_fail (g_task_is_valid (result, self), FALSE);
   g_return_val_if_fail (g_async_result_is_tagged (result, _ostree_fetcher_request_async), FALSE);
 
-  task = (GTask*)result;
-  req = g_task_get_task_data (task);
+  GTask *task = (GTask*)result;
+  FetcherRequest *req = g_task_get_task_data (task);
 
-  ret = g_task_propagate_pointer (task, error);
-  if (!ret)
+  if (!g_task_propagate_boolean (task, error))
     return FALSE;
 
   g_assert (!req->is_membuf);
-  g_assert (out_filename);
-  *out_filename = ret;
+  *out_tmpf = req->tmpf;
+  req->tmpf.initialized = FALSE; /* Transfer ownership */
 
   return TRUE;
 }
index 90986c6678c41093eeb1b2d5220910d951f3705d..e023d34e5d495a27f4ca0d9ca9f4e71ccc53e22a 100644 (file)
@@ -905,11 +905,8 @@ on_stream_read (GObject        *object,
     {
       if (!pending->is_membuf)
         {
-          if (!glnx_open_tmpfile_linkable_at (pending->thread_closure->base_tmpdir_dfd, ".",
-                                              O_WRONLY | O_CLOEXEC, &pending->tmpf, &local_error))
-            goto out;
-          /* This should match the libcurl chmod */
-          if (!glnx_fchmod (pending->tmpf.fd, 0644, &local_error))
+          if (!_ostree_fetcher_tmpf_from_flags (pending->flags, pending->thread_closure->base_tmpdir_dfd,
+                                                &pending->tmpf, &local_error))
             goto out;
           pending->out_stream = g_unix_output_stream_new (pending->tmpf.fd, FALSE);
         }
@@ -943,18 +940,13 @@ on_stream_read (GObject        *object,
         }
       else
         {
-          g_autofree char *uristring =
-            soup_uri_to_string (soup_request_get_uri (pending->request), FALSE);
-          g_autofree char *tmpfile_path =
-            ostree_fetcher_generate_url_tmpname (uristring);
-          if (!glnx_link_tmpfile_at (&pending->tmpf, GLNX_LINK_TMPFILE_REPLACE,
-                                     pending->thread_closure->base_tmpdir_dfd, tmpfile_path,
-                                     &local_error))
-            g_task_return_error (task, g_steal_pointer (&local_error));
+          if (lseek (pending->tmpf.fd, 0, SEEK_SET) < 0)
+            {
+              glnx_set_error_from_errno (&local_error);
+              g_task_return_error (task, g_steal_pointer (&local_error));
+            }
           else
-            g_task_return_pointer (task,
-                                   g_steal_pointer (&tmpfile_path),
-                                   (GDestroyNotify) g_free);
+            g_task_return_boolean (task, TRUE);
         }
       remove_pending (pending);
     }
@@ -1174,7 +1166,7 @@ _ostree_fetcher_request_to_tmpfile (OstreeFetcher         *self,
 gboolean
 _ostree_fetcher_request_to_tmpfile_finish (OstreeFetcher *self,
                                            GAsyncResult  *result,
-                                           char         **out_filename,
+                                           GLnxTmpfile   *out_tmpf,
                                            GError       **error)
 {
   GTask *task;
@@ -1192,8 +1184,8 @@ _ostree_fetcher_request_to_tmpfile_finish (OstreeFetcher *self,
     return FALSE;
 
   g_assert (!pending->is_membuf);
-  g_assert (out_filename);
-  *out_filename = ret;
+  *out_tmpf = pending->tmpf;
+  pending->tmpf.initialized = FALSE; /* Transfer ownership */
 
   return TRUE;
 }
index 5f1dd36d15717c2d04d251cd5e5f71a8b664d7c2..292938168526f9317afbabad8c02588ce23de8c3 100644 (file)
 
 G_BEGIN_DECLS
 
-/* FIXME - delete this and replace by having fetchers simply
- * return O_TMPFILE fds, not file paths.
- */
-static inline char *
-ostree_fetcher_generate_url_tmpname (const char *url)
+static inline gboolean
+_ostree_fetcher_tmpf_from_flags (OstreeFetcherRequestFlags flags,
+                                 int                       dfd,
+                                 GLnxTmpfile              *tmpf,
+                                 GError                  **error)
 {
-  return g_compute_checksum_for_string (G_CHECKSUM_SHA256,
-                                        url, strlen (url));
+  if ((flags & OSTREE_FETCHER_REQUEST_LINKABLE) > 0)
+    {
+      if (!glnx_open_tmpfile_linkable_at (dfd, ".", O_RDWR | O_CLOEXEC, tmpf, error))
+        return FALSE;
+    }
+  else if (!glnx_open_anonymous_tmpfile (O_RDWR | O_CLOEXEC, tmpf, error))
+    return FALSE;
+
+  if (!glnx_fchmod (tmpf->fd, 0644, error))
+    return FALSE;
+  return TRUE;
 }
 
 gboolean _ostree_fetcher_mirrored_request_to_membuf (OstreeFetcher *fetcher,
index 8cdd1e11413d43c54a96e748011f877dd0531242..18aaec4000b48a036e0ed8d3a3e8807ce2f82afd 100644 (file)
@@ -55,7 +55,8 @@ typedef enum {
 
 typedef enum {
   OSTREE_FETCHER_REQUEST_NUL_TERMINATION = (1 << 0),
-  OSTREE_FETCHER_REQUEST_OPTIONAL_CONTENT = (1 << 1)
+  OSTREE_FETCHER_REQUEST_OPTIONAL_CONTENT = (1 << 1),
+  OSTREE_FETCHER_REQUEST_LINKABLE = (1 << 2),
 } OstreeFetcherRequestFlags;
 
 void
@@ -124,7 +125,7 @@ void _ostree_fetcher_request_to_tmpfile (OstreeFetcher         *self,
 
 gboolean _ostree_fetcher_request_to_tmpfile_finish (OstreeFetcher *self,
                                                     GAsyncResult  *result,
-                                                    char         **out_filename,
+                                                    GLnxTmpfile   *out_tmpf,
                                                     GError       **error);
 
 void _ostree_fetcher_request_to_membuf (OstreeFetcher         *self,
index 57bb19852f703da45e6420ea1cca22facf84a10a..9a8c0ebf785d11ebd602c1793791502e2df92a6d 100644 (file)
@@ -142,6 +142,7 @@ typedef struct {
   guint64           start_time;
 
   gboolean          is_mirror;
+  gboolean          trusted_http_direct;
   gboolean          is_commit_only;
   OstreeRepoImportFlags importflags;
 
@@ -1018,17 +1019,18 @@ content_fetch_on_complete (GObject        *object,
   GError **error = &local_error;
   GCancellable *cancellable = NULL;
   guint64 length;
+  g_auto(GLnxTmpfile) tmpf = { 0, };
+  g_autoptr(GInputStream) tmpf_input = NULL;
   g_autoptr(GFileInfo) file_info = NULL;
   g_autoptr(GVariant) xattrs = NULL;
   g_autoptr(GInputStream) file_in = NULL;
   g_autoptr(GInputStream) object_input = NULL;
-  g_auto(OtCleanupUnlinkat) tmp_unlinker = { _ostree_fetcher_get_dfd (fetcher), NULL };
   const char *checksum;
   g_autofree char *checksum_obj = NULL;
   OstreeObjectType objtype;
   gboolean free_fetch_data = TRUE;
 
-  if (!_ostree_fetcher_request_to_tmpfile_finish (fetcher, result, &tmp_unlinker.path, error))
+  if (!_ostree_fetcher_request_to_tmpfile_finish (fetcher, result, &tmpf, error))
     goto out;
 
   ostree_object_name_deserialize (fetch_data->object, &checksum, &objtype);
@@ -1040,48 +1042,31 @@ content_fetch_on_complete (GObject        *object,
   const gboolean verifying_bareuseronly =
     (pull_data->importflags & _OSTREE_REPO_IMPORT_FLAGS_VERIFY_BAREUSERONLY) > 0;
 
-  /* If we're mirroring and writing into an archive repo, and both checksum and
-   * bareuseronly are turned off, we can directly copy the content rather than
-   * paying the cost of exploding it, checksumming, and re-gzip.
+  /* See comments where we set this variable; this is implementing
+   * the --trusted-http/OSTREE_REPO_PULL_FLAGS_TRUSTED_HTTP flags.
    */
-  const gboolean mirroring_into_archive =
-    pull_data->is_mirror && pull_data->repo->mode == OSTREE_REPO_MODE_ARCHIVE;
-  const gboolean import_trusted = !verifying_bareuseronly &&
-    (pull_data->importflags & _OSTREE_REPO_IMPORT_FLAGS_TRUSTED) > 0;
-  if (mirroring_into_archive && import_trusted)
+  if (pull_data->trusted_http_direct)
     {
-      gboolean have_object;
-      if (!ostree_repo_has_object (pull_data->repo, OSTREE_OBJECT_TYPE_FILE, checksum,
-                                   &have_object,
-                                   cancellable, error))
+      g_assert (!verifying_bareuseronly);
+      if (!_ostree_repo_commit_tmpf_final (pull_data->repo, checksum, objtype,
+                                           &tmpf, cancellable, error))
         goto out;
-
-      if (!have_object)
-        {
-          if (!_ostree_repo_commit_path_final (pull_data->repo, checksum, objtype,
-                                               &tmp_unlinker,
-                                               cancellable, error))
-            goto out;
-        }
       pull_data->n_fetched_content++;
     }
   else
     {
+      struct stat stbuf;
+      if (!glnx_fstat (tmpf.fd, &stbuf, error))
+        goto out;
       /* Non-mirroring path */
+      tmpf_input = g_unix_input_stream_new (glnx_steal_fd (&tmpf.fd), TRUE);
 
       /* If it appears corrupted, we'll delete it below */
-      if (!ostree_content_file_parse_at (TRUE, _ostree_fetcher_get_dfd (fetcher),
-                                         tmp_unlinker.path, FALSE,
-                                         &file_in, &file_info, &xattrs,
-                                         cancellable, error))
+      if (!ostree_content_stream_parse (TRUE, tmpf_input, stbuf.st_size, FALSE,
+                                        &file_in, &file_info, &xattrs,
+                                        cancellable, error))
         goto out;
 
-      /* Also, delete it now that we've opened it, we'll hold
-       * a reference to the fd.  If we fail to validate or write, then
-       * the temp space will be cleaned up.
-       */
-      ot_cleanup_unlinkat (&tmp_unlinker);
-
       if (verifying_bareuseronly)
         {
           if (!_ostree_validate_bareuseronly_mode_finfo (file_info, checksum, error))
@@ -1161,13 +1146,12 @@ meta_fetch_on_complete (GObject           *object,
   FetchObjectData *fetch_data = user_data;
   OtPullData *pull_data = fetch_data->pull_data;
   g_autoptr(GVariant) metadata = NULL;
-  g_auto(OtCleanupUnlinkat) tmp_unlinker = { _ostree_fetcher_get_dfd (fetcher), NULL };
+  g_auto(GLnxTmpfile) tmpf = { 0, };
   const char *checksum;
   g_autofree char *checksum_obj = NULL;
   OstreeObjectType objtype;
   g_autoptr(GError) local_error = NULL;
   GError **error = &local_error;
-  glnx_fd_close int fd = -1;
   gboolean free_fetch_data = TRUE;
 
   ostree_object_name_deserialize (fetch_data->object, &checksum, &objtype);
@@ -1175,7 +1159,7 @@ meta_fetch_on_complete (GObject           *object,
   g_debug ("fetch of %s%s complete", checksum_obj,
            fetch_data->is_detached_meta ? " (detached)" : "");
 
-  if (!_ostree_fetcher_request_to_tmpfile_finish (fetcher, result, &tmp_unlinker.path, error))
+  if (!_ostree_fetcher_request_to_tmpfile_finish (fetcher, result, &tmpf, error))
     {
       if (g_error_matches (local_error, G_IO_ERROR, G_IO_ERROR_NOT_FOUND))
         {
@@ -1218,17 +1202,9 @@ meta_fetch_on_complete (GObject           *object,
   if (objtype == OSTREE_OBJECT_TYPE_TOMBSTONE_COMMIT)
     goto out;
 
-  if (!glnx_openat_rdonly (_ostree_fetcher_get_dfd (fetcher), tmp_unlinker.path, TRUE, &fd, error))
-    goto out;
-
-  /* Now delete it, keeping the fd open as the last reference; see comment in
-   * corresponding content fetch path.
-   */
-  ot_cleanup_unlinkat (&tmp_unlinker);
-
   if (fetch_data->is_detached_meta)
     {
-      if (!ot_variant_read_fd (fd, 0, G_VARIANT_TYPE ("a{sv}"),
+      if (!ot_variant_read_fd (tmpf.fd, 0, G_VARIANT_TYPE ("a{sv}"),
                                FALSE, &metadata, error))
         goto out;
 
@@ -1245,7 +1221,7 @@ meta_fetch_on_complete (GObject           *object,
     }
   else
     {
-      if (!ot_variant_read_fd (fd, 0, ostree_metadata_variant_type (objtype),
+      if (!ot_variant_read_fd (tmpf.fd, 0, ostree_metadata_variant_type (objtype),
                                FALSE, &metadata, error))
         goto out;
 
@@ -1314,27 +1290,20 @@ static_deltapart_fetch_on_complete (GObject           *object,
   OstreeFetcher *fetcher = (OstreeFetcher *)object;
   FetchStaticDeltaData *fetch_data = user_data;
   OtPullData *pull_data = fetch_data->pull_data;
-  g_autofree char *temp_path = NULL;
+  g_auto(GLnxTmpfile) tmpf = { 0, };
   g_autoptr(GInputStream) in = NULL;
   g_autoptr(GVariant) part = NULL;
   g_autoptr(GError) local_error = NULL;
   GError **error = &local_error;
-  glnx_fd_close int fd = -1;
   gboolean free_fetch_data = TRUE;
 
   g_debug ("fetch static delta part %s complete", fetch_data->expected_checksum);
 
-  if (!_ostree_fetcher_request_to_tmpfile_finish (fetcher, result, &temp_path, error))
+  if (!_ostree_fetcher_request_to_tmpfile_finish (fetcher, result, &tmpf, error))
     goto out;
 
-  if (!glnx_openat_rdonly (_ostree_fetcher_get_dfd (fetcher), temp_path, TRUE, &fd, error))
-    goto out;
-
-  /* From here on, if we fail to apply the delta, we'll re-fetch it */
-  if (!glnx_unlinkat (_ostree_fetcher_get_dfd (fetcher), temp_path, 0, error))
-    goto out;
-
-  in = g_unix_input_stream_new (fd, FALSE);
+  /* Transfer ownership of the fd */
+  in = g_unix_input_stream_new (glnx_steal_fd (&tmpf.fd), TRUE);
 
   /* TODO - make async */
   if (!_ostree_static_delta_part_open (in, NULL, 0, fetch_data->expected_checksum,
@@ -1979,6 +1948,8 @@ start_fetch (OtPullData *pull_data,
   else
     expected_max_size = 0;
 
+  if (!is_meta && pull_data->trusted_http_direct)
+    flags |= OSTREE_FETCHER_REQUEST_LINKABLE;
   _ostree_fetcher_request_to_tmpfile (pull_data->fetcher, mirrorlist,
                                       obj_subpath, flags, expected_max_size,
                                       is_meta ? OSTREE_REPO_PULL_METADATA_PRIORITY
@@ -3587,6 +3558,11 @@ ostree_repo_pull_with_options (OstreeRepo             *self,
        */
       if ((flags & OSTREE_REPO_PULL_FLAGS_UNTRUSTED) == 0)
         pull_data->importflags |= _OSTREE_REPO_IMPORT_FLAGS_TRUSTED;
+
+      /* Shouldn't be referenced in this path, but just in case.  See below
+       * for more information.
+       */
+      pull_data->trusted_http_direct = FALSE;
     }
   else
     {
@@ -3597,6 +3573,18 @@ ostree_repo_pull_with_options (OstreeRepo             *self,
        */
       if (flags & OSTREE_REPO_PULL_FLAGS_TRUSTED_HTTP)
         pull_data->importflags |= _OSTREE_REPO_IMPORT_FLAGS_TRUSTED;
+
+      const gboolean verifying_bareuseronly =
+        (pull_data->importflags & _OSTREE_REPO_IMPORT_FLAGS_VERIFY_BAREUSERONLY) > 0;
+      /* If we're mirroring and writing into an archive repo, and both checksum and
+       * bareuseronly are turned off, we can directly copy the content rather than
+       * paying the cost of exploding it, checksumming, and re-gzip.
+       */
+      const gboolean mirroring_into_archive =
+        pull_data->is_mirror && pull_data->repo->mode == OSTREE_REPO_MODE_ARCHIVE;
+      const gboolean import_trusted = !verifying_bareuseronly &&
+        (pull_data->importflags & _OSTREE_REPO_IMPORT_FLAGS_TRUSTED) > 0;
+      pull_data->trusted_http_direct = mirroring_into_archive && import_trusted;
     }
 
   /* We can't use static deltas if pulling into an archive repo. */