And implemented SANDBOX, which it needs.
Copyright: © 2013 Joey Hess <id@joeyh.name>
License: GPL-3+
-Files: doc/special_remotes/compute/git-annex-compute-imageconvert doc/special_remotes/compute/git-annex-compute-wasmedge
+Files: doc/special_remotes/compute/git-annex-compute-imageconvert doc/special_remotes/compute/git-annex-compute-wasmedge doc/special_remotes/compute/git-annex-compute-singularity
Copyright: © 2025 Joey Hess <id@joeyh.name>
License: GPL-3+
import Utility.Tmp.Dir
import Utility.Url
import Utility.MonotonicClock
+import Utility.CopyFile
import Types.Key
import Backend
import qualified Git
= ProcessInput FilePath
| ProcessOutput FilePath
| ProcessReproducible
+ | ProcessSandbox
| ProcessProgress PercentFloat
deriving (Show, Eq)
parseCommand "INPUT" = Proto.parse1 ProcessInput
parseCommand "OUTPUT" = Proto.parse1 ProcessOutput
parseCommand "REPRODUCIBLE" = Proto.parse0 ProcessReproducible
+ parseCommand "SANDBOX" = Proto.parse0 ProcessSandbox
parseCommand "PROGRESS" = Proto.parse1 ProcessProgress
parseCommand _ = Proto.parseFail
{ computeState :: ComputeState
, computeInputsUnavailable :: Bool
, computeReproducible :: Bool
+ , computeSandbox :: Bool
}
runComputeProgram
}
showOutput
starttime <- liftIO currentMonotonicTimestamp
- let startresult = ComputeProgramResult state False False
+ let startresult = ComputeProgramResult state False False False
result <- withmeterfile $ \meterfile -> bracket
(liftIO $ createProcess pr)
(liftIO . cleanupProcess)
checksafefile tmpdir subdir f' "input"
checkimmutable knowninput "inputting" f' $ do
(k, inputcontent) <- getinputcontent f'
+ let mkrel a = Just <$>
+ (a >>= liftIO . relPathDirToFile subdir)
mp <- case inputcontent of
Nothing -> pure Nothing
- Just (Right f'') -> liftIO $
- Just <$> relPathDirToFile subdir f''
- Just (Left gitsha) ->
- Just <$> (liftIO . relPathDirToFile subdir
- =<< populategitsha gitsha tmpdir)
+ Just (Right obj)
+ | computeSandbox result ->
+ mkrel $ populatesandbox obj tmpdir
+ | otherwise ->
+ mkrel $ pure obj
+ Just (Left gitsha) ->
+ mkrel $ populategitsha gitsha tmpdir
sendresponse p $
maybe "" fromOsPath mp
let result' = result
return result
Just ProcessReproducible ->
return $ result { computeReproducible = True }
+ Just ProcessSandbox -> do
+ sandboxpath <- liftIO $ fromOsPath <$>
+ relPathDirToFile subdir tmpdir
+ sendresponse p $
+ if null sandboxpath
+ then "."
+ else sandboxpath
+ return $ result { computeSandbox = True }
Nothing -> giveup $
program ++ " output an unparseable line: \"" ++ l ++ "\""
-- to the program as a parameter, which could parse it as a dashed
-- option or other special parameter.
populategitsha gitsha tmpdir = do
- let f = tmpdir </> literalOsPath ".git" </> literalOsPath "objects"
+ let f = tmpdir </> literalOsPath ".git"
+ </> literalOsPath "objects"
</> toOsPath (Git.fromRef' gitsha)
liftIO $ createDirectoryIfMissing True $ takeDirectory f
liftIO . F.writeFile f =<< catObject gitsha
return f
+ populatesandbox annexobj tmpdir = do
+ let f = tmpdir </> literalOsPath ".git"
+ </> literalOsPath "annex"
+ </> literalOsPath "objects"
+ </> takeFileName annexobj
+ liftIO $ createDirectoryIfMissing True $ takeDirectory f
+ liftIO $ unlessM (createLinkOrCopy annexobj f) $
+ giveup "Unable to populate compute sandbox directory"
+ return f
+
withmeterfile a = case meterkey of
Nothing -> a (const noop)
Just (_, progress) -> do
bit-for-bit reproducible. That makes `git-annex addcomputed` behave as if
the `--reproducible` option is set.
+The program can also output a "SANDBOX" line, and then read a line from
+stdin that will be the path to the directory it should sandbox to (which
+corresponds to the top of the git repository, so may be above its working
+directory). Any "INPUT" lines that come after "SANDBOX" will have input
+files be provided via paths that are inside the sandbox directory. Usually
+that is done by making hard links, but it will fall back to copying annexed
+files if the filesystem does not support hard links.
+
Anything that the program outputs to stderr will be displayed to the user.
This stderr should be used for error messages, and possibly computation
output, but not for progress displays.
`git-annex addcomputed --to=imageconvert foo.jpeg foo.gif`
+* [[compute/git-annex-compute-singularity]]
+ Uses [Singularity](https://sylabs.io/) to run a container, which is
+ checked into the git-annex repository, to compute other files in the
+ repository. Amoung other things, this can run other compute programs
+ inside a singularity container.
+ [[Examples here|compute/git-annex-compute-singularity-examples]]
+
* [[compute/git-annex-compute-wasmedge]]
Uses [WasmEdge](https://WasmEdge.org/) to run WASM programs that are
checked into the git-annex repository, to compute other files in the
--- /dev/null
+#!/bin/bash
+# git-annex compute remote program that runs singularity containers
+# from the git-annex repository.
+#
+# Copyright 2025 Joey Hess; licenced under the GNU GPL version 3 or higher.
+set -e
+
+if [ -z "$1" ]; then
+ echo "Usage: container [singularity options] [inputs] -- [outputs] -- [command params]" >&2
+ exit 1
+fi
+
+nocompat_opt=""
+fakeroot_opt=""
+container=""
+binddir="`pwd`"
+rundir="`pwd`"
+
+run_singularity () {
+ # Network access is disabled (with --net --network=none), to
+ # prevent an untrusted singularity image from phoning home and/or
+ # attacking the local network.
+ #
+ # --oci is used to get process namespacing
+ singularity run --net --network=none --oci \
+ --bind="$binddir" --pwd="$rundir" \
+ $nocompat_opt $fakeroot_opt \
+ "$container" "$@"
+}
+
+# Avoid any security problems with harmful terminal escape sequences.
+strip_escape () {
+ sed 's/[\x1B]//g'
+}
+
+if [ -z "$ANNEX_COMPUTE_passthrough" ]; then
+ stage=1
+ while [ -n "$1" ]; do
+ if [ "$1" = "--" ]; then
+ stage=$((stage+1))
+ shift 1
+ else
+ if [ "$stage" = 1 ]; then
+ case "$1" in
+ "--no-compat")
+ nocompat_opt="--no-compat"
+ ;;
+ "--fakeroot")
+ fakeroot_opt="--fakeroot"
+ ;;
+ *)
+ echo "INPUT $1"
+ read input
+ if [ -n "$input" ]; then
+ p="./$1"
+ mkdir -p "$(dirname "$p")"
+ ln "$(realpath "$input")" "$p"
+ if [ -z "$container" ]; then
+ container="$p"
+ fi
+ fi
+ esac
+ shift 1
+ elif [ "$stage" = 2 ]; then
+ echo "OUTPUT $1"
+ read output
+ shift 1
+ else
+ break
+ fi
+ fi
+ done
+ run_singularity "$@" </dev/null 2>&1 | strip_escape >&2
+else
+ # Tell git-annex that the program will be running sandboxed,
+ # it will tell us where the top of the sandbox is, and that's the
+ # directory to bind into singularity.
+ echo "SANDBOX"
+ read pathtotop
+ binddir="$(realpath "$pathtotop")"
+ echo "INPUT $pathtotop/$ANNEX_COMPUTE_passthrough"
+ read input
+ if [ -n "$input" ]; then
+ container="./$ANNEX_COMPUTE_passthrough"
+ mkdir -p "$(dirname "$container")"
+ ln "$(realpath "$input")" "$container"
+ else
+ echo "Unfortunately, addcomputed --fast cannot be used with git-annex-compute-singularity --passthrough=" >&2
+ exit 1
+ fi
+ # stdio is passed through to the git-annex-compute- command inside
+ # singularity
+ run_singularity "$@" 2> >( strip_escape 1>&2 )
+fi
--- /dev/null
+[[git-annex-compute-singularity]] uses [Singularity](https://sylabs.io/)
+to run a container, which is checked into the git-annex repository,
+to [[compute]] other files in the repository.
+
+This can be used in two different ways. One is to run an arbitrary command
+inside the singularity container. That is very flexible, but the syntax is
+slighly awkward since you have to provide the input and output filenames,
+as well as the command. The other way to use it is to have a singularity
+container that contains and runs another `git-annex-compute-` command.
+
+## running an arbitrary command
+
+An example of running an arbitrary command is:
+
+ git-annex initremote singularity type=compute program=git-annex-compute-singularity
+ singularity build debian.sif docker://debian
+ git-annex add debian.sif
+ git-annex addcomputed --to=singularity -- debian.sif foo bar -- baz -- sh -c 'cat foo bar > baz'
+
+Here the first filename passed to `git-annex addcomputed` must be the
+singularity container image to use. It is followed by the input files to
+make available inside the container, followed by "--" and then the output
+files. Finally, "--" separates the output files from the parameters
+to pass into the container.
+
+## passing through to a git-annex-compute- command inside a singularity container
+
+ git-annex initremote foo type=compute program=git-annex-compute-singularity passthrough=imageconvert.sif
+ git-annex addcomputed --to=foo foo.jpeg foo.gif
+
+This example uses a container `imageconvert.sif` that runs
+[[git-annex-compute-imageconvert]]. This allows using `git-annex addcomputed`
+with the same syntax that compute program usually uses.
+
+Note that the container file given to `passthrough=` is relative to the top
+of the git repository.
+
+To create that `imageconvert.sif` container:
+
+ cat > imageconvert.def <<EOF
+ Bootstrap: docker
+ From: debian
+
+ %post
+ apt-get -y update
+ apt-get -y install imagemagick wget
+ wget https://git-annex.branchable.com/special_remotes/compute/git-annex-compute-imageconvert -O /go
+ chmod +x /go
+
+ %runscript
+ /go "$@"
+ EOF
+ sudo singularity build imageconvert.sif imageconvert.def
+
+## singularity options
+
+`singularity run` is used to start the default command in the container.
+The command will find the input files in its current directory, and can
+write the output files to the same directory.
+
+Singularity is run with the `--oci` option, to get process namespacing
+and a generally secure sandboxed environment. Networks access is also
+disabled in the container.
+
+A few singularity options can be provided, to control how the container is
+run. The goal is to only allow options that keep it secure. See singularity's
+documentation for details about these options.
+
+* `--no-compat`
+* `--fakeroot`
This is the remainder of my todo list while I was building the
compute special remote. --[[Joey]]
+* git-annex-compute-singularity with passthrough= cannot be used
+ by `git-annex addcomputed --fast` because the singularity image is not
+ available to run. Maybe make a varity of INPUT that is provided also
+ in --fast mode to solve this?
+
* write a tip showing how to use this
* Write some simple compute programs so we have something to start with.