import: Sped up import from special remote when the imported tree is unchanged
authorJoey Hess <joeyh@joeyh.name>
Tue, 2 Jan 2024 17:56:50 +0000 (13:56 -0400)
committerJoey Hess <joeyh@joeyh.name>
Tue, 2 Jan 2024 17:57:31 +0000 (13:57 -0400)
I saw a nearly 2 minute speed up from this, in a repo with 56000 files some
of which are preferred content of the special remote and others not. In
such a case, addBackExportExcluded has to do a lot of work, which is
unncessary when the tree is unchanged.

When using sync --content, preferred content checking of that many files
takes about 1 minute. So this speeds up sync --content by 3x.
When using git-annex import, the speed up is much larger.

Sponsored-by: Nicholas Golder-Manning on Patreon
Annex/Import.hs
CHANGELOG

index 023d7df1a90385430e1e12e25813e555c4cda1aa..5c9ac2eaaff546e2f6c7650e5767e35ccc68e6e3 100644 (file)
@@ -223,20 +223,27 @@ buildImportCommit' remote importcommitconfig mtrackingcommit imported@(History t
                -- nothing new needs to be committed.
                -- (This is unlikely to happen.)
                | sametodepth h' = return Nothing
-               | otherwise = do
-                       importedcommit <- case getRemoteTrackingBranchImportHistory h of
-                               Nothing -> mkcommitsunconnected imported
-                               Just oldimported@(History oldhc _)
-                                       | importeddepth == 1 ->
-                                               mkcommitconnected imported oldimported
-                                       | otherwise -> do
-                                               let oldimportedtrees = mapHistory historyCommitTree oldimported
-                                               mknewcommits oldhc oldimportedtrees imported
-                       ti' <- addBackExportExcluded remote ti
-                       Just <$> makeRemoteTrackingBranchMergeCommit'
-                               trackingcommit importedcommit ti'
+               -- If the imported tree is unchanged,
+               -- nothing new needs to be committed.
+               | otherwise = getLastImportedTree remote >>= \case
+                       Just (LastImportedTree lasttree)
+                               | lasttree == ti -> return Nothing
+                       _ -> gencommit trackingcommit h
          where
                h'@(History t s) = mapHistory historyCommitTree h
+       
+       gencommit trackingcommit h = do
+               importedcommit <- case getRemoteTrackingBranchImportHistory h of
+                       Nothing -> mkcommitsunconnected imported
+                       Just oldimported@(History oldhc _)
+                               | importeddepth == 1 ->
+                                       mkcommitconnected imported oldimported
+                               | otherwise -> do
+                                       let oldimportedtrees = mapHistory historyCommitTree oldimported
+                                       mknewcommits oldhc oldimportedtrees imported
+               ti' <- addBackExportExcluded remote ti
+               Just <$> makeRemoteTrackingBranchMergeCommit'
+                       trackingcommit importedcommit ti'
 
        importeddepth = historyDepth imported
 
index bb874441e561b27c454c6bdbf090fc37ce5ee32e..c632bef4fbdb12c44ac6a2f5f4083493472a5339 100644 (file)
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,6 +1,8 @@
 git-annex (10.20231228) UNRELEASED; urgency=medium
 
   * info: Added "annex sizes of repositories" table to the overall display.
+  * import: Sped up import from special remote when the imported tree is
+    unchanged.
 
  -- Joey Hess <id@joeyh.name>  Fri, 29 Dec 2023 11:52:06 -0400