pull: Error on depth pull with missing head commit
authorDan Nicholson <dbn@endlessos.org>
Mon, 11 Jan 2021 19:40:38 +0000 (12:40 -0700)
committerDan Nicholson <dbn@endlessos.org>
Tue, 12 Jan 2021 21:19:01 +0000 (14:19 -0700)
When pulling with depth, missing parent commits are ignored. However,
the check was applying to any commit, which means that it would succeed
even if the requested commit was missing. This might happen on a
corrupted remote repo or when using ref data from a stale summary.

To achieve this, the semantics of the `commit_to_depth` hash table is
changed slightly to only ever includes parent commits. This makes it
easy to detect when a parent commit is being referenced (although there
is a minor bug there when multiple refs are being pulled) while keeping
references to commits that need their `commitpartial` files cleaned up.
It also means that the table is only populated on depth pulls, which
saves some memory and processing in the common depth=0 case.

Fixes: #2265
src/libostree/ostree-repo-pull-private.h
src/libostree/ostree-repo-pull.c
tests/test-pull-depth.sh

index d4c3e971a7c6b845db69edde2a526b4885d88d74..59b72e88e4da211e82f67cf472084858a783d496 100644 (file)
@@ -88,7 +88,7 @@ typedef struct {
   GHashTable       *ref_keyring_map; /* Maps OstreeCollectionRef to keyring remote name */
   GPtrArray        *static_delta_superblocks;
   GHashTable       *expected_commit_sizes; /* Maps commit checksum to known size */
-  GHashTable       *commit_to_depth; /* Maps commit checksum maximum depth */
+  GHashTable       *commit_to_depth; /* Maps parent commit checksum maximum depth */
   GHashTable       *scanned_metadata; /* Maps object name to itself */
   GHashTable       *fetched_detached_metadata; /* Map<checksum,GVariant> */
   GHashTable       *requested_metadata; /* Maps object name to itself */
index 7d4b91e2860470b9ca8e7bb4871cf54f5bd50ed8..abbb5a0dd582386282ff49540799a30b93adc980 100644 (file)
@@ -1113,6 +1113,18 @@ on_metadata_written (GObject           *object,
   check_outstanding_requests_handle_error (pull_data, &local_error);
 }
 
+static gboolean
+is_parent_commit (OtPullData *pull_data,
+                  const char *checksum)
+{
+  /* FIXME: Only parent commits are added to the commit_to_depth table,
+   * so if the checksum isn't in the table then a new commit chain is
+   * being started. However, if the desired commit was a parent in a
+   * previously followed chain, then this will be wrong.
+   */
+  return g_hash_table_contains (pull_data->commit_to_depth, checksum);
+}
+
 static void
 meta_fetch_on_complete (GObject           *object,
                         GAsyncResult      *result,
@@ -1158,7 +1170,8 @@ meta_fetch_on_complete (GObject           *object,
            * We may be pulling from a partial repository that ends in
            * a dangling parent reference. */
           else if (objtype == OSTREE_OBJECT_TYPE_COMMIT &&
-                   pull_data->maxdepth != 0)
+                   pull_data->maxdepth != 0 &&
+                   is_parent_commit (pull_data, checksum))
             {
               g_clear_error (&local_error);
               /* If the remote repo supports tombstone commits, check if the commit was intentionally
@@ -1542,8 +1555,6 @@ scan_commit_object (OtPullData                 *pull_data,
   else
     {
       depth = pull_data->maxdepth;
-      g_hash_table_insert (pull_data->commit_to_depth, g_strdup (checksum),
-                           GINT_TO_POINTER (depth));
     }
 
 #ifndef OSTREE_DISABLE_GPGME
@@ -1684,40 +1695,19 @@ scan_commit_object (OtPullData                 *pull_data,
         return FALSE;
     }
 
-  if (parent_csum_bytes != NULL && pull_data->maxdepth == -1)
-    {
-      queue_scan_one_metadata_object_c (pull_data, parent_csum_bytes,
-                                        OSTREE_OBJECT_TYPE_COMMIT, NULL,
-                                        recursion_depth + 1, NULL);
-    }
-  else if (parent_csum_bytes != NULL && depth > 0)
+  if (parent_csum_bytes != NULL && (pull_data->maxdepth == -1 || depth > 0))
     {
       char parent_checksum[OSTREE_SHA256_STRING_LEN+1];
-      gpointer parent_depthp;
-      int parent_depth;
-
       ostree_checksum_inplace_from_bytes (parent_csum_bytes, parent_checksum);
 
-      if (g_hash_table_lookup_extended (pull_data->commit_to_depth, parent_checksum,
-                                        NULL, &parent_depthp))
-        {
-          parent_depth = GPOINTER_TO_INT (parent_depthp);
-        }
-      else
-        {
-          parent_depth = depth - 1;
-        }
-
-      if (parent_depth >= 0)
-        {
-          g_hash_table_insert (pull_data->commit_to_depth, g_strdup (parent_checksum),
-                               GINT_TO_POINTER (parent_depth));
-          queue_scan_one_metadata_object_c (pull_data, parent_csum_bytes,
-                                            OSTREE_OBJECT_TYPE_COMMIT,
-                                            NULL,
-                                            recursion_depth + 1,
-                                            NULL);
-        }
+      int parent_depth = (depth > 0) ? depth - 1 : -1;
+      g_hash_table_insert (pull_data->commit_to_depth, g_strdup (parent_checksum),
+                           GINT_TO_POINTER (parent_depth));
+      queue_scan_one_metadata_object_c (pull_data, parent_csum_bytes,
+                                        OSTREE_OBJECT_TYPE_COMMIT,
+                                        NULL,
+                                        recursion_depth + 1,
+                                        NULL);
     }
 
   /* We only recurse to looking whether we need dirtree/dirmeta
index 998a18f552339e46052177400ccb589bceded9c4..8fb2f5973f3ba17b9ffcc7e000571172bf922fc9 100755 (executable)
@@ -25,7 +25,7 @@ set -euo pipefail
 
 setup_fake_remote_repo1 "archive"
 
-echo '1..1'
+echo '1..3'
 
 cd ${test_tmpdir}
 mkdir repo
@@ -63,3 +63,35 @@ find repo/state -name '*.commitpartial' | wc -l > commitpartialcount
 assert_file_has_content commitpartialcount "^0$"
 
 echo "ok pull depth"
+
+# Check that pulling with depth != 0 succeeds with a missing parent
+# commit. Prune the remote to truncate the history.
+cd ${test_tmpdir}
+${CMD_PREFIX} ostree --repo=ostree-srv/gnomerepo prune --refs-only --depth=0
+
+rm -rf repo/refs/heads/* repo/refs/remotes/* repo/objects/*/*.commit
+${CMD_PREFIX} ostree --repo=repo pull --depth=1 origin main
+find repo/objects -name '*.commit' | wc -l > commitcount
+assert_file_has_content commitcount "^1$"
+find repo/state -name '*.commitpartial' | wc -l > commitpartialcount
+assert_file_has_content commitpartialcount "^0$"
+
+rm -rf repo/refs/heads/* repo/refs/remotes/* repo/objects/*/*.commit
+${CMD_PREFIX} ostree --repo=repo pull --depth=-1 origin main
+find repo/objects -name '*.commit' | wc -l > commitcount
+assert_file_has_content commitcount "^1$"
+find repo/state -name '*.commitpartial' | wc -l > commitpartialcount
+assert_file_has_content commitpartialcount "^0$"
+
+echo "ok pull depth missing parent"
+
+# Check that it errors if the ref head commit is missing.
+cd ${test_tmpdir}
+rm -f ostree-srv/gnomerepo/objects/*/*.commit
+
+rm -rf repo/refs/heads/* repo/refs/remotes/* repo/objects/*/*.commit
+if ${CMD_PREFIX} ostree --repo=repo pull --depth=-1 origin main; then
+    fatal "Pull with depth -1 succeeded with missing HEAD commit"
+fi
+
+echo "ok pull depth missing HEAD commit"