lib/checkout: Rename disjoint union, change to merge identical files
authorColin Walters <walters@verbum.org>
Fri, 8 Sep 2017 21:07:45 +0000 (17:07 -0400)
committerAtomic Bot <atomic-devel@projectatomic.io>
Wed, 13 Sep 2017 19:19:33 +0000 (19:19 +0000)
It turns out that librpm automatically merges identical files between
distinct packages, and this occurs in practice with Fedora today between
`chkconfig` and `initscripts` for exmaple.

Since we added this for rpm-ostree, we basically want to do what librpm does,
let's change the semantics to do a merge.  While we're here rename
to `UNION_IDENTICAL`.

Closes: #1156
Approved by: jlebon

bash/ostree
man/ostree-checkout.xml
src/libostree/ostree-repo-checkout.c
src/libostree/ostree-repo.h
src/ostree/ot-builtin-checkout.c
tests/basic-test.sh

index 4ebe22c3d2e340edf21508ea23592da58d487236..40326102493f29334a067a100c2416a2e65efad0 100644 (file)
@@ -696,7 +696,7 @@ _ostree_checkout() {
         --require-hardlinks -H
         --union
         --union-add
-        --disjoint-union
+        --union-identical
         --user-mode -U
         --whiteouts
     "
index 07d44b3457fd5d5a69a6c22041bab2dee6f54d07..aba70741a821644610ed98d5b002dc4b9fb670ce 100644 (file)
@@ -98,11 +98,12 @@ Boston, MA 02111-1307, USA.
             </varlistentry>
 
             <varlistentry>
-                <term><option>--disjoint-union</option></term>
+                <term><option>--union-identical</option></term>
 
-                <listitem><para>
-                    When layering checkouts, error out if a file would be replaced, but add new files and directories
-                </para></listitem>
+                <listitem><para>Like <literal>--union</literal>, but error out
+                if a file would be replaced with a different file. Add new files
+                and directories, ignore identical files, and keep existing
+                directories. Requires <literal>-H</literal>.</para></listitem>
             </varlistentry>
 
             <varlistentry>
index dbdbf058bf9c8750e5829923a62157f52bbcbaba..868cd186f2170a168a8146bace60305af769c512 100644 (file)
@@ -188,8 +188,6 @@ create_file_copy_from_input_at (OstreeRepo     *repo,
                                 GCancellable   *cancellable,
                                 GError        **error)
 {
-  const gboolean union_mode = options->overwrite_mode == OSTREE_REPO_CHECKOUT_OVERWRITE_UNION_FILES;
-  const gboolean add_mode = options->overwrite_mode == OSTREE_REPO_CHECKOUT_OVERWRITE_ADD_FILES;
   const gboolean sepolicy_enabled = options->sepolicy && !repo->disable_xattrs;
   g_autoptr(GVariant) modified_xattrs = NULL;
 
@@ -218,23 +216,51 @@ create_file_copy_from_input_at (OstreeRepo     *repo,
             return FALSE;
         }
 
-      if (symlinkat (g_file_info_get_symlink_target (file_info),
-                     destination_dfd, destination_name) < 0)
+      const char *target = g_file_info_get_symlink_target (file_info);
+      if (symlinkat (target, destination_dfd, destination_name) < 0)
         {
+          if (errno != EEXIST)
+            return glnx_throw_errno_prefix (error, "symlinkat");
+
           /* Handle union/add behaviors if we get EEXIST */
-          if (errno == EEXIST && union_mode)
+          switch (options->overwrite_mode)
             {
-              /* Unioning?  Let's unlink and try again */
-              (void) unlinkat (destination_dfd, destination_name, 0);
-              if (symlinkat (g_file_info_get_symlink_target (file_info),
-                             destination_dfd, destination_name) < 0)
-                return glnx_throw_errno (error);
+            case OSTREE_REPO_CHECKOUT_OVERWRITE_NONE:
+              return glnx_throw_errno_prefix (error, "symlinkat");
+            case OSTREE_REPO_CHECKOUT_OVERWRITE_UNION_FILES:
+              {
+                /* Unioning?  Let's unlink and try again */
+                (void) unlinkat (destination_dfd, destination_name, 0);
+                if (symlinkat (target, destination_dfd, destination_name) < 0)
+                  return glnx_throw_errno_prefix (error, "symlinkat");
+              }
+            case OSTREE_REPO_CHECKOUT_OVERWRITE_ADD_FILES:
+              /* Note early return - we don't want to set the xattrs below */
+              return TRUE;
+            case OSTREE_REPO_CHECKOUT_OVERWRITE_UNION_IDENTICAL:
+              {
+                /* See the comments for the hardlink version of this
+                 * for why we do this.
+                 */
+                struct stat dest_stbuf;
+                if (!glnx_fstatat (destination_dfd, destination_name, &dest_stbuf,
+                                   AT_SYMLINK_NOFOLLOW, error))
+                  return FALSE;
+                if (S_ISLNK (dest_stbuf.st_mode))
+                  {
+                    g_autofree char *dest_target =
+                      glnx_readlinkat_malloc (destination_dfd, destination_name,
+                                              cancellable, error);
+                    if (!dest_target)
+                      return FALSE;
+                    /* In theory we could also compare xattrs...but eh */
+                    if (g_str_equal (dest_target, target))
+                      return TRUE;
+                  }
+                errno = EEXIST;
+                return glnx_throw_errno_prefix (error, "symlinkat");
+              }
             }
-          else if (errno == EEXIST && add_mode)
-            /* Note early return - we don't want to set the xattrs below */
-            return TRUE;
-          else
-            return glnx_throw_errno (error);
         }
 
       /* Process ownership and xattrs now that we made the link */
@@ -255,7 +281,6 @@ create_file_copy_from_input_at (OstreeRepo     *repo,
   else if (g_file_info_get_file_type (file_info) == G_FILE_TYPE_REGULAR)
     {
       g_auto(GLnxTmpfile) tmpf = { 0, };
-      GLnxLinkTmpfileReplaceMode replace_mode;
 
       if (!glnx_open_tmpfile_linkable_at (destination_dfd, ".", O_WRONLY | O_CLOEXEC,
                                           &tmpf, error))
@@ -278,12 +303,23 @@ create_file_copy_from_input_at (OstreeRepo     *repo,
         return FALSE;
 
       /* The add/union/none behaviors map directly to GLnxLinkTmpfileReplaceMode */
-      if (add_mode)
-        replace_mode = GLNX_LINK_TMPFILE_NOREPLACE_IGNORE_EXIST;
-      else if (union_mode)
-        replace_mode = GLNX_LINK_TMPFILE_REPLACE;
-      else
-        replace_mode = GLNX_LINK_TMPFILE_NOREPLACE;
+      GLnxLinkTmpfileReplaceMode replace_mode;
+      switch (options->overwrite_mode)
+        {
+        case OSTREE_REPO_CHECKOUT_OVERWRITE_NONE:
+          replace_mode = GLNX_LINK_TMPFILE_NOREPLACE;
+          break;
+        case OSTREE_REPO_CHECKOUT_OVERWRITE_UNION_FILES:
+          replace_mode = GLNX_LINK_TMPFILE_REPLACE;
+          break;
+        case OSTREE_REPO_CHECKOUT_OVERWRITE_ADD_FILES:
+          replace_mode = GLNX_LINK_TMPFILE_NOREPLACE_IGNORE_EXIST;
+          break;
+        case OSTREE_REPO_CHECKOUT_OVERWRITE_UNION_IDENTICAL:
+          /* We don't support copying in union identical */
+          g_assert_not_reached ();
+          break;
+        }
 
       if (!glnx_link_tmpfile_at (&tmpf, replace_mode,
                                  destination_dfd, destination_name,
@@ -329,25 +365,61 @@ checkout_file_hardlink (OstreeRepo                          *self,
   else if (allow_noent && errno == ENOENT)
     {
     }
-  else if (errno == EEXIST && options->overwrite_mode == OSTREE_REPO_CHECKOUT_OVERWRITE_ADD_FILES)
-    {
-      /* In this mode, we keep existing content.  Distinguish this case though to
-       * avoid inserting into the devino cache.
-       */
-      ret_result = HARDLINK_RESULT_SKIP_EXISTED;
-    }
-  else if (errno == EEXIST && options->overwrite_mode == OSTREE_REPO_CHECKOUT_OVERWRITE_UNION_FILES)
+  else if (errno == EEXIST)
     {
-      /* Idiocy, from man rename(2)
-       *
-       * "If oldpath and newpath are existing hard links referring to
-       * the same file, then rename() does nothing, and returns a
-       * success status."
-       *
-       * So we can't make this atomic.
-       */
-      (void) unlinkat (destination_dfd, destination_name, 0);
-      goto again;
+      /* When we get EEXIST, we need to handle the different overwrite modes. */
+      switch (options->overwrite_mode)
+        {
+        case OSTREE_REPO_CHECKOUT_OVERWRITE_NONE:
+          /* Just throw */
+          return glnx_throw_errno_prefix (error, "Hardlinking %s to %s", loose_path, destination_name);
+        case OSTREE_REPO_CHECKOUT_OVERWRITE_ADD_FILES:
+          /* In this mode, we keep existing content.  Distinguish this case though to
+           * avoid inserting into the devino cache.
+           */
+          ret_result = HARDLINK_RESULT_SKIP_EXISTED;
+          break;
+        case OSTREE_REPO_CHECKOUT_OVERWRITE_UNION_FILES:
+          {
+            /* Idiocy, from man rename(2)
+             *
+             * "If oldpath and newpath are existing hard links referring to
+             * the same file, then rename() does nothing, and returns a
+             * success status."
+             *
+             * So we can't make this atomic.
+             */
+            (void) unlinkat (destination_dfd, destination_name, 0);
+            goto again;
+          }
+        case OSTREE_REPO_CHECKOUT_OVERWRITE_UNION_IDENTICAL:
+          {
+            /* In this mode, we error out on EEXIST *unless* the files are already
+             * hardlinked, which is what rpm-ostree wants for package layering.
+             * https://github.com/projectatomic/rpm-ostree/issues/982
+             *
+             * This should be similar to the librpm version:
+             * https://github.com/rpm-software-management/rpm/blob/e3cd2bc85e0578f158d14e6f9624eb955c32543b/lib/rpmfi.c#L921
+             * in rpmfilesCompare().
+             */
+            struct stat src_stbuf;
+            if (!glnx_fstatat (srcfd, loose_path, &src_stbuf,
+                               AT_SYMLINK_NOFOLLOW, error))
+              return FALSE;
+            struct stat dest_stbuf;
+            if (!glnx_fstatat (destination_dfd, destination_name, &dest_stbuf,
+                               AT_SYMLINK_NOFOLLOW, error))
+              return FALSE;
+            const gboolean is_identical =
+              (src_stbuf.st_dev == dest_stbuf.st_dev &&
+               src_stbuf.st_ino == dest_stbuf.st_ino);
+            if (is_identical)
+              ret_result = HARDLINK_RESULT_SKIP_EXISTED;
+            else
+              return glnx_throw_errno_prefix (error, "Hardlinking %s to %s", loose_path, destination_name);
+            break;
+          }
+        }
     }
   else
     {
@@ -652,13 +724,20 @@ checkout_tree_at_recurse (OstreeRepo                        *self,
      */
     if (TEMP_FAILURE_RETRY (mkdirat (destination_parent_fd, destination_name, 0700)) < 0)
       {
-        if (errno == EEXIST &&
-            (options->overwrite_mode == OSTREE_REPO_CHECKOUT_OVERWRITE_UNION_FILES
-             || options->overwrite_mode == OSTREE_REPO_CHECKOUT_OVERWRITE_ADD_FILES
-             || options->overwrite_mode == OSTREE_REPO_CHECKOUT_OVERWRITE_DISJOINT_UNION_FILES))
-          did_exist = TRUE;
-        else
-          return glnx_throw_errno (error);
+        if (errno != EEXIST)
+          return glnx_throw_errno_prefix (error, "mkdirat");
+
+        switch (options->overwrite_mode)
+          {
+          case OSTREE_REPO_CHECKOUT_OVERWRITE_NONE:
+            return glnx_throw_errno_prefix (error, "mkdirat");
+          /* All of these cases are the same for directories */
+          case OSTREE_REPO_CHECKOUT_OVERWRITE_UNION_FILES:
+          case OSTREE_REPO_CHECKOUT_OVERWRITE_ADD_FILES:
+          case OSTREE_REPO_CHECKOUT_OVERWRITE_UNION_IDENTICAL:
+            did_exist = TRUE;
+            break;
+          }
       }
   }
 
@@ -1002,6 +1081,9 @@ ostree_repo_checkout_at (OstreeRepo                        *self,
 
   g_return_val_if_fail (!(options->force_copy && options->no_copy_fallback), FALSE);
   g_return_val_if_fail (!options->sepolicy || options->force_copy, FALSE);
+  /* union identical requires hardlink mode */
+  g_return_val_if_fail (!(options->overwrite_mode == OSTREE_REPO_CHECKOUT_OVERWRITE_UNION_IDENTICAL &&
+                          !options->no_copy_fallback), FALSE);
 
   g_autoptr(GFile) commit_root = (GFile*) _ostree_repo_file_new_for_commit (self, commit, error);
   if (!commit_root)
index 58b1a1dc27abd918cc789fbfb1de36cc64c9d6ac..227fe597f85fb8133acdd15f27234d2bd4027e7a 100644 (file)
@@ -832,13 +832,13 @@ typedef enum {
  * @OSTREE_REPO_CHECKOUT_OVERWRITE_NONE: No special options
  * @OSTREE_REPO_CHECKOUT_OVERWRITE_UNION_FILES: When layering checkouts, unlink() and replace existing files, but do not modify existing directories
  * @OSTREE_REPO_CHECKOUT_OVERWRITE_ADD_FILES: Only add new files/directories
- * @OSTREE_REPO_CHECKOUT_OVERWRITE_DISJOINT_UNION_FILES: When layering checkouts, error out if a file would be replaced, but add new files and directories
+ * @OSTREE_REPO_CHECKOUT_OVERWRITE_UNION_IDENTICAL: Like UNION_FILES, but error if files are not identical (requires hardlink checkouts)
  */
 typedef enum {
   OSTREE_REPO_CHECKOUT_OVERWRITE_NONE = 0,
   OSTREE_REPO_CHECKOUT_OVERWRITE_UNION_FILES = 1,
   OSTREE_REPO_CHECKOUT_OVERWRITE_ADD_FILES = 2, /* Since: 2017.3 */
-  OSTREE_REPO_CHECKOUT_OVERWRITE_DISJOINT_UNION_FILES = 3 /* Since: 2017.11 */
+  OSTREE_REPO_CHECKOUT_OVERWRITE_UNION_IDENTICAL = 3, /* Since: 2017.11 */
 } OstreeRepoCheckoutOverwriteMode;
 
 _OSTREE_PUBLIC
index a3494ae3abc935dfa2f5aca05174deefd841fc45..170d24e90fe13f3d6681af78c149a839747cdb52 100644 (file)
@@ -37,7 +37,7 @@ static gboolean opt_disable_cache;
 static char *opt_subpath;
 static gboolean opt_union;
 static gboolean opt_union_add;
-static gboolean opt_disjoint_union;
+static gboolean opt_union_identical;
 static gboolean opt_whiteouts;
 static gboolean opt_from_stdin;
 static char *opt_from_file;
@@ -73,7 +73,7 @@ static GOptionEntry options[] = {
   { "subpath", 0, 0, G_OPTION_ARG_FILENAME, &opt_subpath, "Checkout sub-directory PATH", "PATH" },
   { "union", 0, 0, G_OPTION_ARG_NONE, &opt_union, "Keep existing directories, overwrite existing files", NULL },
   { "union-add", 0, 0, G_OPTION_ARG_NONE, &opt_union_add, "Keep existing files/directories, only add new", NULL },
-  { "disjoint-union", 0, 0, G_OPTION_ARG_NONE, &opt_disjoint_union, "When layering checkouts, error out if a file would be replaced, but add new files and directories", NULL },
+  { "union-identical", 0, 0, G_OPTION_ARG_NONE, &opt_union_identical, "When layering checkouts, error out if a file would be replaced with a different version, but add new files and directories", NULL },
   { "whiteouts", 0, 0, G_OPTION_ARG_NONE, &opt_whiteouts, "Process 'whiteout' (Docker style) entries", NULL },
   { "allow-noent", 0, 0, G_OPTION_ARG_NONE, &opt_allow_noent, "Do nothing if specified path does not exist", NULL },
   { "from-stdin", 0, 0, G_OPTION_ARG_NONE, &opt_from_stdin, "Process many checkouts from standard input", NULL },
@@ -101,7 +101,7 @@ process_one_checkout (OstreeRepo           *repo,
    * convenient infrastructure for testing C APIs with data.
    */
   if (opt_disable_cache || opt_whiteouts || opt_require_hardlinks ||
-      opt_union_add || opt_force_copy || opt_bareuseronly_dirs || opt_disjoint_union)
+      opt_union_add || opt_force_copy || opt_bareuseronly_dirs || opt_union_identical)
     {
       OstreeRepoCheckoutAtOptions options = { 0, };
 
@@ -114,16 +114,16 @@ process_one_checkout (OstreeRepo           *repo,
                        "Cannot specify both --union and --union-add");
           goto out;
         }
-      if (opt_union && opt_disjoint_union)
+      if (opt_union && opt_union_identical)
         {
           g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
-                       "Cannot specify both --union and --disjoint-union");
+                       "Cannot specify both --union and --union-identical");
           goto out;
         }
-      if (opt_union_add && opt_disjoint_union)
+      if (opt_union_add && opt_union_identical)
         {
           g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
-                       "Cannot specify both --union-add and --disjoint-union ");
+                       "Cannot specify both --union-add and --union-identical ");
           goto out;
         }
       if (opt_require_hardlinks && opt_force_copy)
@@ -135,8 +135,16 @@ process_one_checkout (OstreeRepo           *repo,
         options.overwrite_mode = OSTREE_REPO_CHECKOUT_OVERWRITE_UNION_FILES;
       else if (opt_union_add)
         options.overwrite_mode = OSTREE_REPO_CHECKOUT_OVERWRITE_ADD_FILES;
-      else if (opt_disjoint_union)
-        options.overwrite_mode = OSTREE_REPO_CHECKOUT_OVERWRITE_DISJOINT_UNION_FILES;
+      else if (opt_union_identical)
+        {
+          if (!opt_require_hardlinks)
+            {
+              g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
+                           "--union-identical requires --require-hardlinks");
+              goto out;
+            }
+          options.overwrite_mode = OSTREE_REPO_CHECKOUT_OVERWRITE_UNION_IDENTICAL;
+        }
       if (opt_whiteouts)
         options.process_whiteouts = TRUE;
       if (subpath)
index 6b43ca907e4bf0639db16b08644217c022429c3c..5058af1d2598c8ef4f29e4573211a4f4c0d74faa 100644 (file)
@@ -26,6 +26,7 @@ python -c 'import yaml; yaml.safe_load(open("version.yaml"))'
 echo "ok yaml version"
 
 CHECKOUT_U_ARG=""
+CHECKOUT_H_ARGS="-H"
 COMMIT_ARGS=""
 DIFF_ARGS=""
 if is_bare_user_only_repo repo; then
@@ -36,6 +37,11 @@ if is_bare_user_only_repo repo; then
     DIFF_ARGS="--owner-uid=0 --owner-gid=0 --no-xattrs"
     # Also, since we can't check out uid=0 files we need to check out in user mode
     CHECKOUT_U_ARG="-U"
+    CHECKOUT_H_ARGS="-U -H"
+else
+    if grep -E -q '^mode=bare-user' repo/config; then
+        CHECKOUT_H_ARGS="-U -H"
+    fi
 fi
 
 validate_checkout_basic() {
@@ -469,31 +475,56 @@ assert_file_has_content checkout-test-union-add/union-add-test 'existing file fo
 assert_file_has_content checkout-test-union-add/union-add-test2 'another file for union add testing'
 echo "ok checkout union add"
 
-# Create some new files for testing
-cd ${test_tmpdir}
-mkdir disjoint-union-test
-mkdir disjoint-union-test/test_one
-chmod a+w disjoint-union-test/test_one
-echo 'file for add dirs testing' > disjoint-union-test/test_one/test_file
-$OSTREE commit ${COMMIT_ARGS} -b test-disjoint-union --tree=dir=disjoint-union-test
-$OSTREE checkout --disjoint-union test-disjoint-union checkout-test-disjoint-union
-echo "ok adding new directories and new file"
-# Make a new file, and try the checkout again
-echo 'second test file' >  disjoint-union-test/test_one/test_second_file
-$OSTREE commit ${COMMIT_ARGS} -b test-disjoint-union --tree=dir=disjoint-union-test
-# Check out the latest commit, should fail due to presence of existing files
-if $OSTREE checkout --disjoint-union test-disjoint-union checkout-test-disjoint-union 2> err.txt; then
-    assert_not_reached "checking out files unexpectedly succeeded!"
+# Test --union-identical <https://github.com/projectatomic/rpm-ostree/issues/982>
+# Prepare data:
+cd ${test_tmpdir}
+for x in $(seq 3); do
+    mkdir -p pkg${x}/usr/{bin,share/licenses}
+    # Separate binaries and symlinks
+    echo 'binary for pkg'${x} > pkg${x}/usr/bin/pkg${x}
+    ln -s pkg${x} pkg${x}/usr/bin/link${x}
+    # But they share the GPL
+    echo 'this is the GPL' > pkg${x}/usr/share/licenses/COPYING
+    ln -s COPYING pkg${x}/usr/share/licenses/LICENSE
+    $OSTREE commit -b union-identical-pkg${x} --tree=dir=pkg${x}
+done
+rm union-identical-test -rf
+for x in $(seq 3); do
+    $OSTREE checkout ${CHECKOUT_H_ARGS} --union-identical union-identical-pkg${x} union-identical-test
+done
+if $OSTREE checkout ${CHECKOUT_H_ARGS/-H/} --union-identical union-identical-pkg${x} union-identical-test-tmp 2>err.txt; then
+    fatal "--union-identical without -H"
 fi
-assert_file_has_content err.txt 'File exists'
-# Verify that Union mode still functions properly
-rm checkout-test-disjoint-union/test_one/test_file
-echo 'file for testing union mode alongwith disjoint-union mode' > checkout-test-disjoint-union/test_one/test_file
-$OSTREE checkout --union test-disjoint-union checkout-test-disjoint-union
-assert_has_file checkout-test-disjoint-union/test_one/test_second_file
-# This shows the file with same name has been successfully overwriten
-assert_file_has_content checkout-test-disjoint-union/test_one/test_file 'file for add dirs testing'
-echo "ok checkout disjoint union"
+assert_file_has_content err.txt "error:.*--union-identical requires --require-hardlinks"
+for x in $(seq 3); do
+    for v in pkg link; do
+        assert_file_has_content union-identical-test/usr/bin/${v}${x} "binary for pkg"${x}
+    done
+    for v in COPYING LICENSE; do
+        assert_file_has_content union-identical-test/usr/share/licenses/${v} GPL
+    done
+done
+echo "ok checkout union identical merges"
+
+# Make conflicting packages, one with regfile, one with symlink
+mkdir -p pkg-conflict1bin/usr/{bin,share/licenses}
+echo 'binary for pkg-conflict1bin' > pkg-conflict1bin/usr/bin/pkg1
+echo 'this is the GPL' > pkg-conflict1bin/usr/share/licenses/COPYING
+$OSTREE commit -b union-identical-conflictpkg1bin --tree=dir=pkg-conflict1bin
+mkdir -p pkg-conflict1link/usr/{bin,share/licenses}
+ln -s somewhere-else > pkg-conflict1link/usr/bin/pkg1
+echo 'this is the GPL' > pkg-conflict1link/usr/share/licenses/COPYING
+$OSTREE commit -b union-identical-conflictpkg1link --tree=dir=pkg-conflict1link
+
+for v in bin link; do
+    rm union-identical-test -rf
+    $OSTREE checkout ${CHECKOUT_H_ARGS} --union-identical union-identical-pkg1 union-identical-test
+    if $OSTREE checkout ${CHECKOUT_H_ARGS} --union-identical union-identical-conflictpkg1${v} union-identical-test 2>err.txt; then
+        fatal "union identical $v succeeded?"
+    fi
+    assert_file_has_content err.txt 'error:.*File exists'
+done
+echo "ok checkout union identical conflicts"
 
 cd ${test_tmpdir}
 rm files -rf && mkdir files