From: Joey Hess Date: Fri, 29 Dec 2023 16:09:30 +0000 (-0400) Subject: info: Added "annex sizes of repositories" table to the overall display X-Git-Tag: archive/raspbian/10.20250416-2+rpi1~1^2~29^2~71 X-Git-Url: https://dgit.raspbian.org/?a=commitdiff_plain;h=a4a5ec636690256563907693477f1bbbd9596090;p=git-annex.git info: Added "annex sizes of repositories" table to the overall display Thanks to previous work in 11cc9f19339b5fa80f238e42407eb9ed87cf8855, this is almost entirely free, it only needs to do some additional map lookups and math. The strictness annotations keep the memory use from blowing up. Sponsored-by: unqueued on Patreon --- diff --git a/CHANGELOG b/CHANGELOG index c990a6b85e..bb874441e5 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,9 @@ +git-annex (10.20231228) UNRELEASED; urgency=medium + + * info: Added "annex sizes of repositories" table to the overall display. + + -- Joey Hess Fri, 29 Dec 2023 11:52:06 -0400 + git-annex (10.20231227) upstream; urgency=medium * migrate: Support distributed migrations by recording each migration, diff --git a/Command/Info.hs b/Command/Info.hs index 864ad02579..6ec76f5b84 100644 --- a/Command/Info.hs +++ b/Command/Info.hs @@ -283,6 +283,7 @@ global_slow_stats = , known_annex_files True , known_annex_size True , total_annex_size + , reposizes_stats_global , backend_usage , bloom_info ] @@ -298,7 +299,7 @@ tree_fast_stats isworktree = tree_slow_stats :: [FilePath -> Stat] tree_slow_stats = [ const numcopies_stats - , const reposizes_stats + , const reposizes_stats_tree , const reposizes_total ] @@ -372,6 +373,10 @@ countRepoList :: Int -> String -> String countRepoList _ [] = "0" countRepoList n s = show n ++ "\n" ++ beginning s +dispRepoList :: String -> String +dispRepoList [] = "" +dispRepoList s = "\n" ++ beginning s + dir_name :: FilePath -> Stat dir_name dir = simpleStat "directory" $ pure dir @@ -441,7 +446,8 @@ known_annex_size isworktree = total_annex_size :: Stat total_annex_size = simpleStat "combined annex size of all repositories" $ - showSizeKeys =<< cachedAllRepoData + showSizeKeys . fromMaybe mempty . allRepoData + =<< cachedAllRepoData treeDesc :: Bool -> String treeDesc True = "working tree" @@ -545,21 +551,29 @@ numcopies_stats = stat "numcopies stats" $ json fmt $ . map (\(variance, count) -> "numcopies " ++ variance ++ ": " ++ show count) . V.toList -reposizes_stats :: Stat -reposizes_stats = stat desc $ nojson $ do +reposizes_stats_tree :: Stat +reposizes_stats_tree = reposizes_stats True "repositories containing these files" + =<< cachedRepoData + +reposizes_stats_global :: Stat +reposizes_stats_global = reposizes_stats False "annex sizes of repositories" + . repoData =<< cachedAllRepoData + +reposizes_stats :: Bool -> String -> M.Map UUID KeyInfo -> Stat +reposizes_stats count desc m = stat desc $ nojson $ do sizer <- mkSizer - l <- map (\(u, kd) -> (u, sizer storageUnits True (sizeKeys kd))) - . sortBy (flip (comparing (sizeKeys . snd))) - . M.toList - <$> cachedRepoData + let l = map (\(u, kd) -> (u, sizer storageUnits True (sizeKeys kd))) $ + sortBy (flip (comparing (sizeKeys . snd))) $ + M.toList m let maxlen = maximum (map (length . snd) l) descm <- lift Remote.uuidDescriptions -- This also handles json display. s <- lift $ Remote.prettyPrintUUIDsWith (Just "size") desc descm (Just . show) $ map (\(u, sz) -> (u, Just $ mkdisp sz maxlen)) l - return $ countRepoList (length l) s + return $ if count + then countRepoList (length l) s + else dispRepoList s where - desc = "repositories containing these files" mkdisp sz maxlen = DualDisp { dispNormal = lpad maxlen sz , dispJson = sz @@ -619,29 +633,34 @@ cachedReferencedData = do put s { referencedData = Just v } return v -cachedAllRepoData :: StatState KeyInfo +cachedAllRepoData :: StatState StatInfo cachedAllRepoData = do s <- get case allRepoData s of - Just v -> return v + Just _ -> return s Nothing -> do matcher <- lift getKeyOnlyMatcher - !v <- lift $ overLocationLogs emptyKeyInfo $ \k locs d -> do + !(d, rd) <- lift $ overLocationLogs (emptyKeyInfo, mempty) $ \k locs (d, rd) -> do ifM (matchOnKey matcher k) ( do - numcopies <- genericLength . snd + alivelocs <- snd <$> trustPartition DeadTrusted locs - return (addKeyCopies numcopies k d) - , return d + let !d' = addKeyCopies (genericLength alivelocs) k d + let !rd' = foldl' (flip (accumrepodata k)) rd alivelocs + return (d', rd') + , return (d, rd) ) - put s { allRepoData = Just v } - return v + let s' = s { allRepoData = Just d, repoData = rd } + put s' + return s' + where + accumrepodata k = M.alter (Just . addKey k . fromMaybe emptyKeyInfo) --- currently only available for directory info +-- only available for directory info, populated by earlier getDirStatInfo cachedNumCopiesStats :: StatState (Maybe NumCopiesStats) cachedNumCopiesStats = numCopiesStats <$> get --- currently only available for directory info +-- only available for directory info, populated by earlier getDirStatInfo cachedRepoData :: StatState (M.Map UUID KeyInfo) cachedRepoData = repoData <$> get diff --git a/doc/git-annex-info.mdwn b/doc/git-annex-info.mdwn index bdbcf1415e..9b0e9715df 100644 --- a/doc/git-annex-info.mdwn +++ b/doc/git-annex-info.mdwn @@ -12,8 +12,9 @@ Displays statistics and other information for the specified item. When no item is specified, displays overall information. This includes a list of all known repositories, how much annexed data is present in the -local repository, and the total size of all annexed data in the working -tree. +local repository, the total size of all annexed data in the working +tree, the combined size of annexed data in all repositories, and the annex +sizes of each repository. When a directory is specified, displays information about the annexed files in that directory (and subdirectories). diff --git a/doc/todo/info_show_total_annex_sizes_of_repositories.mdwn b/doc/todo/info_show_total_annex_sizes_of_repositories.mdwn index ce48edc537..d96e7e9b51 100644 --- a/doc/todo/info_show_total_annex_sizes_of_repositories.mdwn +++ b/doc/todo/info_show_total_annex_sizes_of_repositories.mdwn @@ -6,3 +6,5 @@ I think that this recently became possible to inplement cheaply. cachedAllRepoData is collected now, used for "combined annex size of all repositories". For this, it also needs to accumulate the size of each separate repository. --[[Joey]] + +> [[done]] --[[Joey]]