codalab · Didayolo · Feb 15, 2023 · Jan 25, 2023 · Jan 25, 2023 · Jan 30, 2023
diff --git a/Containerfile.compute_worker_podman b/Containerfile.compute_worker_podman
@@ -0,0 +1,63 @@
+FROM fedora:37
+
+# Include deps
+RUN dnf -y update && \
+    dnf -y install podman fuse-overlayfs python3.8 python3-pip \
+        --exclude container-selinux && \
+    dnf clean all && \
+    rm -rf /var/cache /var/log/dnf* /var/log/yum.*
+
+# Setup user 
+RUN useradd worker; \
+echo -e "worker:1:999\nworker:1001:64535" > /etc/subuid; \
+echo -e "worker:1:999\nworker:1001:64535" > /etc/subgid;
+
+# Copy over the podman container configuration
+COPY podman/containers.conf /etc/containers/containers.conf
+COPY podman/worker-containers.conf /home/worker/.config/containers/containers.conf
+
+# Copy over the podman storage configuration
+COPY podman/worker-storage.conf /home/worker/.config/containers/storage.conf
+
+RUN mkdir -p /home/worker/.local/share/containers && \
+    chown worker:worker -R /home/worker && \
+    chmod 644 /etc/containers/containers.conf
+
+# Copy & modify the defaults to provide reference if runtime changes needed.
+# Changes here are required for running with fuse-overlay storage inside container.
+RUN sed -e 's|^#mount_program|mount_program|g' \
+           -e '/additionalimage.*/a "/var/lib/shared",' \
+           -e 's|^mountopt[[:space:]]*=.*$|mountopt = "nodev,fsync=0"|g' \
+           /usr/share/containers/storage.conf \
+           > /etc/containers/storage.conf
+
+# Add volume for containers
+VOLUME /home/worker/.local/share/containers
+
+# Create directory for tmp space
+RUN mkdir /codabench && \
+    chown worker:worker /codabench
+
+# Set up podman registry for dockerhub
+RUN echo -e "[registries.search]\nregistries = ['docker.io']\n" > /etc/containers/registries.conf
+
+# This makes output not buffer and return immediately, nice for seeing results in stdout
+ENV PYTHONUNBUFFERED 1
+ENV CONTAINER_ENGINE_EXECUTABLE podman
+
+# Get pip for 3.8 
+RUN python3.8 -m ensurepip --upgrade
+
+WORKDIR /home/worker/compute_worker
+
+ADD compute_worker/ /home/worker/compute_worker
+
+RUN chown worker:worker -R /home/worker/compute_worker
+
+RUN pip3.8 install -r /home/worker/compute_worker/compute_worker_requirements.txt
+
+CMD celery -A compute_worker worker \
+    -l info \
+    -Q compute-worker \
+    -n compute-worker@%n \
+    --concurrency=1
diff --git a/Containerfile.compute_worker_podman_gpu b/Containerfile.compute_worker_podman_gpu
@@ -0,0 +1,66 @@
+FROM fedora:37
+
+# Include deps
+RUN curl -s -L https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo | sudo tee /etc/yum.repos.d/cuda.repo && \
+    curl -s -L https://nvidia.github.io/nvidia-docker/rhel9.0/nvidia-docker.repo | sudo tee /etc/yum.repos.d/nvidia-docker.repo && \
+    rpm -Uvh http://download1.rpmfusion.org/free/fedora/rpmfusion-free-release-$(rpm -E %fedora).noarch.rpm && \
+    rpm -Uvh http://download1.rpmfusion.org/nonfree/fedora/rpmfusion-nonfree-release-$(rpm -E %fedora).noarch.rpm && \
+    dnf -y update && \
+    dnf module install -y nvidia-driver:latest-dkms && \
+    dnf -y install podman fuse-overlayfs python3.8 python3-pip nvidia-container-runtime nvidia-container-toolkit \
+     cuda --exclude container-selinux && \
+    dnf clean all && \
+    rm -rf /var/cache /var/log/dnf* /var/log/yum.*
+
+# Setup user 
+RUN useradd worker; \
+echo -e "worker:1:999\nworker:1001:64535" > /etc/subuid; \
+echo -e "worker:1:999\nworker:1001:64535" > /etc/subgid;
+
+# Copy over the podman container configuration
+COPY podman/containers.conf /etc/containers/containers.conf
+COPY podman/worker-containers.conf /home/worker/.config/containers/containers.conf
+
+# Copy over the podman storage configuration
+COPY podman/worker-storage.conf /home/worker/.config/containers/storage.conf
+
+RUN mkdir -p /home/worker/.local/share/containers && \
+    chown worker:worker -R /home/worker && \
+    chmod 644 /etc/containers/containers.conf
+
+# Copy & modify the defaults to provide reference if runtime changes needed.
+# Changes here are required for running with fuse-overlay storage inside container.
+RUN sed -e 's|^#mount_program|mount_program|g' \
+           -e '/additionalimage.*/a "/var/lib/shared",' \
+           -e 's|^mountopt[[:space:]]*=.*$|mountopt = "nodev,fsync=0"|g' \
+           /usr/share/containers/storage.conf \
+           > /etc/containers/storage.conf; sed -i 's/^#no-cgroups = false/no-cgroups = true/;' /etc/nvidia-container-runtime/config.toml
+
+
+# Add volume for containers
+VOLUME /home/worker/.local/share/containers
+
+# This makes output not buffer and return immediately, nice for seeing results in stdout
+ENV PYTHONUNBUFFERED 1
+ENV CONTAINER_ENGINE_EXECUTABLE podman
+
+# Create directory for tmp space
+RUN mkdir /codabench && \
+    chown worker:worker /codabench && \
+# Set up podman registry for dockerhub
+    echo -e "[registries.search]\nregistries = ['docker.io']\n" > /etc/containers/registries.conf && \
+# Get pip for 3.8 
+    python3.8 -m ensurepip --upgrade
+
+WORKDIR /home/worker/compute_worker
+
+ADD compute_worker/ /home/worker/compute_worker
+
+RUN chown worker:worker -R /home/worker/compute_worker && \
+ pip3.8 install -r /home/worker/compute_worker/compute_worker_requirements.txt
+
+CMD nvidia-smi && celery -A compute_worker worker \
+    -l info \
+    -Q compute-worker \
+    -n compute-worker@%n \
+    --concurrency=1
diff --git a/Dockerfile.compute_worker b/Dockerfile.compute_worker
@@ -6,10 +6,10 @@ ENV PYTHONUNBUFFERED 1
 # Install Docker
 RUN apt-get update && curl -fsSL https://get.docker.com | sh
 
-ADD docker/compute_worker/compute_worker_requirements.txt .
+ADD compute_worker/compute_worker_requirements.txt .
 RUN pip install -r compute_worker_requirements.txt
 
-ADD docker/compute_worker .
+ADD compute_worker .
 
 CMD celery -A compute_worker worker \
     -l info \

diff --git a/Dockerfile.compute_worker_gpu b/Dockerfile.compute_worker_gpu
@@ -19,9 +19,9 @@ RUN apt-get update && apt-get install -y nvidia-docker2
 ENV NVIDIA_DOCKER 1
 
 # Python reqs and actual worker stuff
-ADD docker/compute_worker/compute_worker_requirements.txt .
+ADD compute_worker/compute_worker_requirements.txt .
 RUN pip3 install -r compute_worker_requirements.txt
-ADD docker/compute_worker .
+ADD compute_worker .
 
 CMD celery -A compute_worker worker \
     -l info \

diff --git a/docker/compute_worker/celery_config.py → compute_worker/celery_config.py b/docker/compute_worker/celery_config.py → compute_worker/celery_config.py
diff --git a/docker/compute_worker/compute_worker.py → compute_worker/compute_worker.py b/docker/compute_worker/compute_worker.py → compute_worker/compute_worker.py
@@ -65,6 +65,14 @@
     STATUS_FAILED,
 )
 
+# Setup the container engine that we are using
+if os.environ.get("CONTAINER_ENGINE_EXECUTABLE"):
+    CONTAINER_ENGINE_EXECUTABLE = os.environ.get("CONTAINER_ENGINE_EXECUTABLE")
+# We could probably depreciate this now that we can specify the executable
+elif os.environ.get("NVIDIA_DOCKER"):
+    CONTAINER_ENGINE_EXECUTABLE = "nvidia-docker"
+else:
+    CONTAINER_ENGINE_EXECUTABLE = "docker"
 
 class SubmissionException(Exception):
     pass
@@ -181,7 +189,7 @@ def __init__(self, run_args):
         self.user_pk = run_args["user_pk"]
         self.submission_id = run_args["id"]
         self.submissions_api_url = run_args["submissions_api_url"]
-        self.docker_image = run_args["docker_image"]
+        self.container_image = run_args["docker_image"]
         self.secret = run_args["secret"]
         self.prediction_result = run_args["prediction_result"]
         self.scoring_result = run_args.get("scoring_result")
@@ -221,7 +229,7 @@ def __init__(self, run_args):
         self.requests_session.mount('https://', adapter)
 
     async def watch_detailed_results(self):
-        """Watches files alongside scoring + program docker containers, currently only used
+        """Watches files alongside scoring + program containers, currently only used
         for detailed_results.html"""
         if not self.detailed_results_url:
             return
@@ -314,15 +322,15 @@ def _update_status(self, status, extra_information=None):
         #     })
         self._update_submission(data)
 
-    def _get_docker_image(self, image_name):
-        logger.info("Running docker pull for image: {}".format(image_name))
+    def _get_container_image(self, image_name):
+        logger.info("Running pull for image: {}".format(image_name))
         try:
-            cmd = ['docker', 'pull', image_name]
-            docker_pull = check_output(cmd)
-            logger.info("Docker pull complete for image: {0} with output of {1}".format(image_name, docker_pull))
+            cmd = [CONTAINER_ENGINE_EXECUTABLE, 'pull', image_name]
+            container_engine_pull = check_output(cmd)
+            logger.info("Pull complete for image: {0} with output of {1}".format(image_name, container_engine_pull))
         except CalledProcessError:
-            logger.info("Docker pull for image: {} returned a non-zero exit code!")
-            raise SubmissionException(f"Docker pull for {image_name} failed!")
+            logger.info("Pull for image: {} returned a non-zero exit code!")
+            raise SubmissionException(f"Pull for {image_name} failed!")
 
     def _get_bundle(self, url, destination, cache=True):
         """Downloads zip from url and unzips into destination. If cache=True then url is hashed and checked
@@ -357,17 +365,17 @@ def _get_bundle(self, url, destination, cache=True):
         # Give back zip file path for other uses, i.e. md5'ing the zip to ID it
         return bundle_file
 
-    async def _run_docker_cmd(self, docker_cmd, kind):
+    async def _run_container_engine_cmd(self, engine_cmd, kind):
         """This runs a command and asynchronously writes the data to both a storage file
         and a socket
 
-        :param docker_cmd: the list of docker command arguments
+        :param engine_cmd: the list of container engine command arguments
         :param kind: either 'ingestion' or 'program'
         :return:
         """
         start = time.time()
         proc = await asyncio.create_subprocess_exec(
-            *docker_cmd,
+            *engine_cmd,
             stdout=asyncio.subprocess.PIPE,
             stderr=asyncio.subprocess.PIPE
         )
@@ -442,17 +450,23 @@ async def _run_docker_cmd(self, docker_cmd, kind):
         await websocket.close()
 
     def _get_host_path(self, *paths):
-        """Turns an absolute path inside our docker container, into what the path
-        would be on the host machine"""
+        """Turns an absolute path inside our container, into what the path
+        would be on the host machine. We also ensure that the directory exists,
+        docker will create if necessary, but other container engines such as
+        podman may not."""
         # Take our list of paths and smash 'em together
         path = os.path.join(*paths)
 
-        # pull front of path, which points to the location inside docker
+        # pull front of path, which points to the location inside the container
         path = path[len(BASE_DIR):]
 
-        # add host to front, so when we run commands in docker on the host they
+        # add host to front, so when we run commands in the container on the host they
         # can be seen properly
         path = os.path.join(HOST_DIRECTORY, path)
+
+        # Create if necessary
+        os.makedirs(path, exist_ok=True)
+
         return path
 
     async def _run_program_directory(self, program_dir, kind, can_be_output=False):
@@ -494,13 +508,8 @@ async def _run_program_directory(self, program_dir, kind, can_be_output=False):
                 )
                 return
 
-        if os.environ.get("NVIDIA_DOCKER"):
-            docker_process_name = "nvidia-docker"
-        else:
-            docker_process_name = "docker"
-
-        docker_cmd = [
-            docker_process_name,
+        engine_cmd = [
+            CONTAINER_ENGINE_EXECUTABLE,
             'run',
             # Remove it after run
             '--rm',
@@ -528,21 +537,21 @@ async def _run_program_directory(self, program_dir, kind, can_be_output=False):
             else:
                 ingested_program_location = "program"
 
-            docker_cmd += ['-v', f'{self._get_host_path(self.root_dir, ingested_program_location)}:/app/ingested_program']
+            engine_cmd += ['-v', f'{self._get_host_path(self.root_dir, ingested_program_location)}:/app/ingested_program']
 
         if self.input_data:
-            docker_cmd += ['-v', f'{self._get_host_path(self.root_dir, "input_data")}:/app/input_data']
+            engine_cmd += ['-v', f'{self._get_host_path(self.root_dir, "input_data")}:/app/input_data']
 
         if self.is_scoring:
             # For scoring programs, we want to have a shared directory just in case we have an ingestion program.
             # This will add the share dir regardless of ingestion or scoring, as long as we're `is_scoring`
-            docker_cmd += ['-v', f'{self._get_host_path(self.root_dir, "shared")}:/app/shared']
+            engine_cmd += ['-v', f'{self._get_host_path(self.root_dir, "shared")}:/app/shared']
 
             # Input from submission (or submission + ingestion combo)
-            docker_cmd += ['-v', f'{self._get_host_path(self.input_dir)}:/app/input']
+            engine_cmd += ['-v', f'{self._get_host_path(self.input_dir)}:/app/input']
 
         # Set the image name (i.e. "codalab/codalab-legacy") for the container
-        docker_cmd += [self.docker_image]
+        engine_cmd += [self.container_image]
 
         # Handle Legacy competitions by replacing anything in the run command
         command = replace_legacy_metadata_command(
@@ -553,12 +562,12 @@ async def _run_program_directory(self, program_dir, kind, can_be_output=False):
         )
 
         # Append the actual program to run
-        docker_cmd += command.split(' ')
+        engine_cmd += command.split(' ')
 
-        logger.info(f"Running program = {' '.join(docker_cmd)}")
+        logger.info(f"Running program = {' '.join(engine_cmd)}")
 
-        # This runs the docker command and asynchronously passes data back via websocket
-        return await self._run_docker_cmd(docker_cmd, kind=kind)
+        # This runs the container engine command and asynchronously passes data back via websocket
+        return await self._run_container_engine_cmd(engine_cmd, kind=kind)
 
     def _put_dir(self, url, directory):
         logger.info("Putting dir %s in %s" % (directory, url))
@@ -649,9 +658,9 @@ def prepare(self):
         for filename in glob.iglob(self.root_dir + '**/*.*', recursive=True):
             logger.info(filename)
 
-        # Before the run starts we want to download docker images, they may take a while to download
+        # Before the run starts we want to download images, they may take a while to download
         # and to do this during the run would subtract from the participants time.
-        self._get_docker_image(self.docker_image)
+        self._get_container_image(self.container_image)
 
     def start(self):
         if not self.is_scoring:
@@ -690,7 +699,7 @@ def start(self):
                     else:
                         program_to_kill = self.program_container_name
                     # Try and stop the program. If stop does not succeed
-                    kill_code = subprocess.call(['docker', 'stop', str(program_to_kill)])
+                    kill_code = subprocess.call([CONTAINER_ENGINE_EXECUTABLE, 'stop', str(program_to_kill)])
                     logger.info(f'Kill process returned {kill_code}')
                 if kind == 'program':
                     self.program_exit_code = return_code

diff --git a/...te_worker/compute_worker_requirements.txt → ...te_worker/compute_worker_requirements.txt b/...te_worker/compute_worker_requirements.txt → ...te_worker/compute_worker_requirements.txt
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -201,7 +201,7 @@ services:
       - django
       - rabbit
     volumes:
-      - ./docker/compute_worker:/app
+      - ./compute_worker:/app
       - ${HOST_DIRECTORY:-/tmp/codabench}:/codabench
       # Actual connection back to docker parent to run things
       - /var/run/docker.sock:/var/run/docker.sock

diff --git a/podman/containers.conf b/podman/containers.conf
@@ -0,0 +1,12 @@
+[containers]
+netns="host"
+userns="host"
+ipcns="host"
+utsns="host"
+cgroupns="host"
+cgroups="disabled"
+log_driver = "k8s-file"
+[engine]
+cgroup_manager = "cgroupfs"
+events_logger="file"
+runtime="crun"
diff --git a/podman/worker-containers.conf b/podman/worker-containers.conf
@@ -0,0 +1,5 @@
+[containers]
+volumes = [
+	"/proc:/proc",
+]
+default_sysctls = []
diff --git a/podman/worker-storage.conf b/podman/worker-storage.conf
@@ -0,0 +1,5 @@
+[storage]
+driver = "overlay"
+
+[storage.options.overlay]
+mount_program = "/usr/bin/fuse-overlayfs"