handle comutations in subdirs of the git repository
authorJoey Hess <joeyh@joeyh.name>
Tue, 25 Feb 2025 19:08:38 +0000 (15:08 -0400)
committerJoey Hess <joeyh@joeyh.name>
Tue, 25 Feb 2025 19:08:38 +0000 (15:08 -0400)
Eg, a computation might be run in "foo/" and refer to "../bar" as an
input or output.

So, the subdir is part of the computation state.

Also, prevent input or output of files that are outside the git
repository. Of course, the program can access any file on disk if it
wants to; this is just a guard against mistakes. And it may also be
useful if the program comunicates with something less trusted than it,
eg a container image, so input/output files communicated by that are not
the source of security problems.

Remote/Compute.hs
doc/design/compute_special_remote_interface.mdwn
doc/git-annex-addcomputed.mdwn
doc/git-annex-recompute.mdwn

index 08b7f385d455f980c65430ecbd5552d641f65ea4..06017e6365b77d3e795766d3a6206e377aa49757 100644 (file)
@@ -12,8 +12,10 @@ module Remote.Compute (
        ComputeState(..),
        setComputeState,
        getComputeStates,
+       ComputeProgram,
        getComputeProgram,
        runComputeProgram,
+       ImmutableState(..),
 ) where
 
 import Annex.Common
@@ -33,6 +35,7 @@ import Logs.MetaData
 import Utility.Metered
 import Utility.TimeStamp
 import Utility.Env
+import Utility.Tmp.Dir
 import qualified Git
 import qualified Utility.SimpleProtocol as Proto
 
@@ -166,8 +169,9 @@ instance Proto.Serializable PercentFloat where
 
 data ComputeState = ComputeState
        { computeParams :: [String]
-       , computeInputs :: M.Map FilePath Key
-       , computeOutputs :: M.Map FilePath (Maybe Key)
+       , computeInputs :: M.Map OsPath Key
+       , computeOutputs :: M.Map OsPath (Maybe Key)
+       , computeSubdir :: OsPath
        , computeReproducible :: Bool
        }
        deriving (Show, Eq)
@@ -175,12 +179,12 @@ data ComputeState = ComputeState
 {- Formats a ComputeState as an URL query string.
  -
  - Prefixes computeParams with 'p', computeInputs with 'i',
- - and computeOutput with 'o'.
+ - and computeOutput with 'o'. Uses "d" for computeSubdir.
  -
  - When the passed Key is an output, rather than duplicate it
  - in the query string, that output has no value.
  -
- - Example: "psomefile&pdestfile&pbaz&isomefile=WORM--foo&odestfile="
+ - Example: "psomefile&pdestfile&pbaz&isomefile=WORM--foo&odestfile=&d=subdir"
  -
  - The computeParams are in the order they were given. The computeInputs
  - and computeOutputs are sorted in ascending order for stability.
@@ -190,13 +194,14 @@ formatComputeState k st = renderQuery False $ concat
        [ map formatparam (computeParams st)
        , map formatinput (M.toAscList (computeInputs st))
        , mapMaybe formatoutput (M.toAscList (computeOutputs st))
+       , [("d", Just (fromOsPath (computeSubdir st)))]
        ]
   where
        formatparam p = ("p" <> encodeBS p, Nothing)
        formatinput (file, key) =
-               ("i" <> toRawFilePath file, Just (serializeKey' key))
+               ("i" <> fromOsPath file, Just (serializeKey' key))
        formatoutput (file, (Just key)) = Just $
-               ("o" <> toRawFilePath file,
+               ("o" <> fromOsPath file,
                        if key == k
                                then Nothing
                                else Just (serializeKey' key)
@@ -208,7 +213,7 @@ parseComputeState k b =
        let st = go emptycomputestate (parseQuery b)
        in if st == emptycomputestate then Nothing else Just st
   where
-       emptycomputestate = ComputeState mempty mempty mempty False
+       emptycomputestate = ComputeState mempty mempty mempty "." False
        go :: ComputeState -> [QueryItem] -> ComputeState
        go c [] = c { computeParams = reverse (computeParams c) }
        go c ((f, v):rest) = 
@@ -220,7 +225,7 @@ parseComputeState k b =
                                key <- deserializeKey' =<< v
                                Just $ c
                                        { computeInputs = 
-                                               M.insert i key
+                                               M.insert (toOsPath i) key
                                                        (computeInputs c)
                                        }
                        ('o':o) -> case v of
@@ -228,14 +233,21 @@ parseComputeState k b =
                                        key <- deserializeKey' kv
                                        Just $ c
                                                { computeOutputs =
-                                                       M.insert o (Just key)
+                                                       M.insert (toOsPath o)
+                                                               (Just key)
                                                                (computeOutputs c)
                                                }
                                Nothing -> Just $ c
                                        { computeOutputs = 
-                                               M.insert o (Just k)
+                                               M.insert (toOsPath o)
+                                                       (Just k)
                                                        (computeOutputs c)
                                        }
+                       ('d':[]) -> do
+                               subdir <- v
+                               Just $ c
+                                       { computeSubdir = toOsPath subdir
+                                       }
                        _ -> Nothing
                in go c' rest
 
@@ -288,14 +300,14 @@ runComputeProgram
        -> (ComputeState -> OsPath -> Annex v)
        -> Annex v
 runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate) getinputcontent cont =
-       withOtherTmp $ \tmpdir -> 
-               go tmpdir 
-                       `finally` liftIO (removeDirectoryRecursive tmpdir)
+       withOtherTmp $ \othertmpdir ->
+               withTmpDirIn othertmpdir "compute" go
   where
        go tmpdir = do
                environ <- computeProgramEnvironment state
+               subdir <- liftIO $ getsubdir tmpdir
                let pr = (proc program (computeParams state))
-                        { cwd = Just (fromOsPath tmpdir)
+                        { cwd = Just (fromOsPath subdir)
                         , std_in = CreatePipe
                         , std_out = CreatePipe
                         , env = Just environ
@@ -303,16 +315,26 @@ runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate)
                state' <- bracket
                        (liftIO $ createProcess pr)
                        (liftIO . cleanupProcess)
-                       (getinput state tmpdir)
-               cont state' tmpdir
+                       (getinput state tmpdir subdir)
+               cont state' subdir
+               
+       getsubdir tmpdir = do
+               let subdir = tmpdir </> computeSubdir state
+               ifM (dirContains <$> absPath tmpdir <*> absPath subdir)
+                       ( do
+                               createDirectoryIfMissing True subdir
+                               return subdir
+                       -- Ignore unsafe value in state.
+                       , return tmpdir
+                       )
        
-       getinput state' tmpdir p = 
+       getinput state' tmpdir subdir p = 
                liftIO (hGetLineUntilExitOrEOF (processHandle p) (stdoutHandle p)) >>= \case
                        Just l
-                               | null l -> getinput state' tmpdir p
+                               | null l -> getinput state' tmpdir subdir p
                                | otherwise -> do
-                                       state'' <- parseoutput p state' l
-                                       getinput state'' tmpdir p
+                                       state'' <- parseoutput p tmpdir subdir state' l
+                                       getinput state'' tmpdir subdir p
                        Nothing -> do
                                liftIO $ hClose (stdoutHandle p)
                                liftIO $ hClose (stdinHandle p)
@@ -320,28 +342,36 @@ runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate)
                                        giveup $ program ++ " exited unsuccessfully"
                                return state'
        
-       parseoutput p state' l = case Proto.parseMessage l of
-               Just (ProcessInput f) ->
-                       let knowninput = M.member f (computeInputs state')
-                       in checkimmutable knowninput l $ do
-                               (k, mp) <- getinputcontent (toOsPath f)
+       parseoutput p tmpdir subdir state' l = case Proto.parseMessage l of
+               Just (ProcessInput f) -> do
+                       let f' = toOsPath f
+                       let knowninput = M.member f' (computeInputs state')
+                       checksafefile tmpdir subdir f' "input"
+                       checkimmutable knowninput l $ do
+                               (k, mp) <- getinputcontent f'
+                               mp' <- liftIO $ maybe (pure Nothing)
+                                       (Just <$$> relPathDirToFile subdir)
+                                       mp
                                liftIO $ hPutStrLn (stdinHandle p) $
-                                       maybe "" fromOsPath mp
+                                       maybe "" fromOsPath mp'
+                               liftIO $ hFlush (stdinHandle p)
                                return $ if knowninput
                                        then state'
                                        else state'
                                                { computeInputs = 
-                                                       M.insert f k
+                                                       M.insert f' k
                                                                (computeInputs state')
                                                }
-               Just (ProcessOutput f) ->
-                       let knownoutput = M.member f (computeOutputs state')
-                       in checkimmutable knownoutput l $ 
+               Just (ProcessOutput f) -> do
+                       let f' = toOsPath f
+                       checksafefile tmpdir subdir f' "output"
+                       let knownoutput = M.member f' (computeOutputs state')
+                       checkimmutable knownoutput l $ 
                                return $ if knownoutput
                                        then state'
                                        else state'
                                                { computeOutputs = 
-                                                       M.insert f Nothing
+                                                       M.insert f' Nothing
                                                                (computeOutputs state')
                                                }
                Just (ProcessProgress percent) -> do
@@ -352,6 +382,14 @@ runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate)
                Nothing -> giveup $
                        program ++ " output included an unparseable line: \"" ++ l ++ "\""
 
+       checksafefile tmpdir subdir f fileaction = do
+               let err problem = giveup $
+                       program ++ " tried to " ++ fileaction ++ " a file that is " ++ problem ++ ": " ++ fromOsPath f
+               unlessM (liftIO $ dirContains <$> absPath tmpdir <*> absPath (subdir </> f)) $
+                       err "outside the git repository"
+               when (any (\p -> dropTrailingPathSeparator p == literalOsPath ".git") (splitPath f)) $
+                       err "inside the .git directory"
+
        checkimmutable True _ a = a
        checkimmutable False l a
                | not immutablestate = a
@@ -385,7 +423,7 @@ computeKey rs (ComputeProgram program) k af dest p vc = do
                        [] -> Nothing
 
        go keyfile state tmpdir = do
-               let keyfile' = tmpdir </> toOsPath keyfile
+               let keyfile' = tmpdir </> keyfile
                unlessM (liftIO $ doesFileExist keyfile') $
                        giveup $ program ++ " exited sucessfully, but failed to write the computed file"
                catchNonAsync (liftIO $ moveFile keyfile' dest)
@@ -395,7 +433,7 @@ computeKey rs (ComputeProgram program) k af dest p vc = do
                forM_ (M.toList $ computeOutputs state) $ \case
                        (file, (Just key)) ->
                                when (k /= key) $ do
-                                       let file' = tmpdir </> toOsPath file
+                                       let file' = tmpdir </> file
                                        whenM (liftIO $ doesFileExist file') $
                                                whenM (verifyKeyContentPostRetrieval RetrievalAllKeysSecure vc verification k file') $
                                                        void $ tryNonAsync $ moveAnnex k file'
index 5c771c17ad0262982416deac9a5cc5822a0a57d8..34b7da7e77b8f345abf0cfd9de187f93f95c2d2b 100644 (file)
@@ -26,7 +26,9 @@ For security, the program should avoid exposing user input to the shell
 unprotected, or otherwise executing it.
 
 The program is run in a temporary directory, which will be cleaned up after
-it exits.
+it exits. Note that it may be run in a subdirectory of its temporary
+directory. Eg, when `git-annex addcomputed` is run in a `foo/bar/`
+subdirectory of the git repository.
 
 The content of any annexed file in the repository can be an input
 to the computation. The program requests an input by writing a line to
@@ -93,10 +95,10 @@ An example `git-annex-compute-foo` shell script follows:
         echo "Usage: convert input output [passes=n]" >&2
         exit 1
     fi
-    if [ -z "$ANNEX_COMPUTE_passes" ];
+    if [ -z "$ANNEX_COMPUTE_passes" ]; then
         ANNEX_COMPUTE_passes=1
     fi
-    echo "INPUT "$2"
+    echo "INPUT $2"
     read input
     echo "OUTPUT $3"
     echo REPRODUCIBLE
index 487bb70ff156b1e7ec413a7547d06a30aab757cb..bca6e1144d4af318cc9c96d709d534cebf20bff5 100644 (file)
@@ -33,6 +33,8 @@ Some examples of how this might look:
     git-annex addcomputed --to=y -- compress foo --level=9
     git-annex addcomputed --to=z -- clip foo 2:01-3:00 combine with bar to baz
 
+
+
 # OPTIONS
 
 * `--to=remote`
@@ -54,7 +56,7 @@ Some examples of how this might look:
   Adds computed files to the repository, without generating their content
   yet. 
 
-* `--unreproducible`
+* `--unreproducible`, `-u`
 
   Indicate that the computation is not expected to be fully reproducible.
   It can vary, in ways that produce files that equivilant enough to
index c86173c2eba1dd96a86d73af34265cf1f50c64c3..2800a74106f634d13ce407f5c5b456ea83b920cf 100644 (file)
@@ -22,12 +22,12 @@ in the repository.
 
   Recompute files even when their input files have not changed.
 
-* `--unreproducible`
+* `--unreproducible`, `-u`
 
   Convert files that were added with `git-annex addcomputed --reproducible`
   to be as if they were added without that option.
 
-* `--reproducible`
+* `--reproducible`, `-r`
 
   Convert files that were added with `git-annex addcomputed --unreproducible`
   to be as if they were added with `--reproducible`.