-
Notifications
You must be signed in to change notification settings - Fork 45
Add support for running compute worker with other container engines #763
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
111f31f
fd3f38b
36e3327
63709e5
43e01d4
fd7d38f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,63 @@ | ||
| FROM fedora:37 | ||
|
|
||
| # Include deps | ||
| RUN dnf -y update && \ | ||
| dnf -y install podman fuse-overlayfs python3.8 python3-pip \ | ||
| --exclude container-selinux && \ | ||
| dnf clean all && \ | ||
| rm -rf /var/cache /var/log/dnf* /var/log/yum.* | ||
|
|
||
| # Setup user | ||
| RUN useradd worker; \ | ||
| echo -e "worker:1:999\nworker:1001:64535" > /etc/subuid; \ | ||
| echo -e "worker:1:999\nworker:1001:64535" > /etc/subgid; | ||
|
|
||
| # Copy over the podman container configuration | ||
| COPY podman/containers.conf /etc/containers/containers.conf | ||
| COPY podman/worker-containers.conf /home/worker/.config/containers/containers.conf | ||
|
|
||
| # Copy over the podman storage configuration | ||
| COPY podman/worker-storage.conf /home/worker/.config/containers/storage.conf | ||
|
|
||
| RUN mkdir -p /home/worker/.local/share/containers && \ | ||
| chown worker:worker -R /home/worker && \ | ||
| chmod 644 /etc/containers/containers.conf | ||
|
|
||
| # Copy & modify the defaults to provide reference if runtime changes needed. | ||
| # Changes here are required for running with fuse-overlay storage inside container. | ||
| RUN sed -e 's|^#mount_program|mount_program|g' \ | ||
| -e '/additionalimage.*/a "/var/lib/shared",' \ | ||
| -e 's|^mountopt[[:space:]]*=.*$|mountopt = "nodev,fsync=0"|g' \ | ||
| /usr/share/containers/storage.conf \ | ||
| > /etc/containers/storage.conf | ||
|
|
||
| # Add volume for containers | ||
| VOLUME /home/worker/.local/share/containers | ||
|
|
||
| # Create directory for tmp space | ||
| RUN mkdir /codabench && \ | ||
| chown worker:worker /codabench | ||
|
|
||
| # Set up podman registry for dockerhub | ||
| RUN echo -e "[registries.search]\nregistries = ['docker.io']\n" > /etc/containers/registries.conf | ||
|
|
||
| # This makes output not buffer and return immediately, nice for seeing results in stdout | ||
| ENV PYTHONUNBUFFERED 1 | ||
| ENV CONTAINER_ENGINE_EXECUTABLE podman | ||
|
|
||
| # Get pip for 3.8 | ||
| RUN python3.8 -m ensurepip --upgrade | ||
|
|
||
| WORKDIR /home/worker/compute_worker | ||
|
|
||
| ADD compute_worker/ /home/worker/compute_worker | ||
|
|
||
| RUN chown worker:worker -R /home/worker/compute_worker | ||
|
|
||
| RUN pip3.8 install -r /home/worker/compute_worker/compute_worker_requirements.txt | ||
|
|
||
| CMD celery -A compute_worker worker \ | ||
| -l info \ | ||
| -Q compute-worker \ | ||
| -n compute-worker@%n \ | ||
| --concurrency=1 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,66 @@ | ||
| FROM fedora:37 | ||
|
|
||
| # Include deps | ||
| RUN curl -s -L https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo | sudo tee /etc/yum.repos.d/cuda.repo && \ | ||
| curl -s -L https://nvidia.github.io/nvidia-docker/rhel9.0/nvidia-docker.repo | sudo tee /etc/yum.repos.d/nvidia-docker.repo && \ | ||
| rpm -Uvh http://download1.rpmfusion.org/free/fedora/rpmfusion-free-release-$(rpm -E %fedora).noarch.rpm && \ | ||
| rpm -Uvh http://download1.rpmfusion.org/nonfree/fedora/rpmfusion-nonfree-release-$(rpm -E %fedora).noarch.rpm && \ | ||
| dnf -y update && \ | ||
| dnf module install -y nvidia-driver:latest-dkms && \ | ||
| dnf -y install podman fuse-overlayfs python3.8 python3-pip nvidia-container-runtime nvidia-container-toolkit \ | ||
| cuda --exclude container-selinux && \ | ||
| dnf clean all && \ | ||
| rm -rf /var/cache /var/log/dnf* /var/log/yum.* | ||
|
|
||
| # Setup user | ||
| RUN useradd worker; \ | ||
| echo -e "worker:1:999\nworker:1001:64535" > /etc/subuid; \ | ||
| echo -e "worker:1:999\nworker:1001:64535" > /etc/subgid; | ||
|
|
||
| # Copy over the podman container configuration | ||
| COPY podman/containers.conf /etc/containers/containers.conf | ||
| COPY podman/worker-containers.conf /home/worker/.config/containers/containers.conf | ||
|
|
||
| # Copy over the podman storage configuration | ||
| COPY podman/worker-storage.conf /home/worker/.config/containers/storage.conf | ||
|
|
||
| RUN mkdir -p /home/worker/.local/share/containers && \ | ||
| chown worker:worker -R /home/worker && \ | ||
| chmod 644 /etc/containers/containers.conf | ||
|
|
||
| # Copy & modify the defaults to provide reference if runtime changes needed. | ||
| # Changes here are required for running with fuse-overlay storage inside container. | ||
| RUN sed -e 's|^#mount_program|mount_program|g' \ | ||
| -e '/additionalimage.*/a "/var/lib/shared",' \ | ||
| -e 's|^mountopt[[:space:]]*=.*$|mountopt = "nodev,fsync=0"|g' \ | ||
| /usr/share/containers/storage.conf \ | ||
| > /etc/containers/storage.conf; sed -i 's/^#no-cgroups = false/no-cgroups = true/;' /etc/nvidia-container-runtime/config.toml | ||
|
|
||
|
|
||
| # Add volume for containers | ||
| VOLUME /home/worker/.local/share/containers | ||
|
|
||
| # This makes output not buffer and return immediately, nice for seeing results in stdout | ||
| ENV PYTHONUNBUFFERED 1 | ||
| ENV CONTAINER_ENGINE_EXECUTABLE podman | ||
|
|
||
| # Create directory for tmp space | ||
| RUN mkdir /codabench && \ | ||
| chown worker:worker /codabench && \ | ||
| # Set up podman registry for dockerhub | ||
| echo -e "[registries.search]\nregistries = ['docker.io']\n" > /etc/containers/registries.conf && \ | ||
| # Get pip for 3.8 | ||
| python3.8 -m ensurepip --upgrade | ||
|
|
||
| WORKDIR /home/worker/compute_worker | ||
|
|
||
| ADD compute_worker/ /home/worker/compute_worker | ||
|
|
||
| RUN chown worker:worker -R /home/worker/compute_worker && \ | ||
| pip3.8 install -r /home/worker/compute_worker/compute_worker_requirements.txt | ||
|
|
||
| CMD nvidia-smi && celery -A compute_worker worker \ | ||
| -l info \ | ||
| -Q compute-worker \ | ||
| -n compute-worker@%n \ | ||
| --concurrency=1 | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We shouldn't duplicate this code. We should just base the gpu version on the other compute worker image and just make the necessary changes.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I built the gpu version base on your Containerfile in order to validate gpu case. You can remove it if it isn't necessary. |
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,12 @@ | ||
| [containers] | ||
| netns="host" | ||
| userns="host" | ||
| ipcns="host" | ||
| utsns="host" | ||
| cgroupns="host" | ||
| cgroups="disabled" | ||
| log_driver = "k8s-file" | ||
| [engine] | ||
| cgroup_manager = "cgroupfs" | ||
| events_logger="file" | ||
| runtime="crun" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| [containers] | ||
| volumes = [ | ||
| "/proc:/proc", | ||
| ] | ||
| default_sysctls = [] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| [storage] | ||
| driver = "overlay" | ||
|
|
||
| [storage.options.overlay] | ||
| mount_program = "/usr/bin/fuse-overlayfs" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@dtuantran Not sure how this built for you,
sudois not setup in the container, anyway these steps run as root so sudo is not needed. I will try to fix it up.