diff --git a/README.md b/README.md index 63075a4ef..cd8762753 100644 --- a/README.md +++ b/README.md @@ -40,32 +40,95 @@ conda remove --name softlearning --all ## Docker Installation ### docker-compose -To build the image and run the container: -``` -export MJKEY="$(cat ~/.mujoco/mjkey.txt)" \ - && docker-compose \ - -f ./docker/docker-compose.dev.cpu.yml \ - up \ - -d \ - --force-recreate -``` + +To build the image: + +- CPU: + + ```sh + DOCKER_BUILDKIT=1 \ + docker build \ + -f ./docker/Dockerfile.softlearning.base.cpu \ + -t softlearning:latest-cpu \ + --progress=plain \ + --secret id=mjkey,src="${HOME}/.mujoco/mjkey.txt" . + ``` + +- GPU: + + ```sh + DOCKER_BUILDKIT=1 \ + docker build \ + -f ./docker/Dockerfile.softlearning.base.gpu \ + -t softlearning:latest-gpu \ + --progress=plain \ + --secret id=mjkey,src="${HOME}/.mujoco/mjkey.txt" . + ``` + +and run the container: + +- CPU: + + ```sh + docker-compose \ + -p ${USER} \ + -f ./docker/docker-compose.dev.cpu.yml \ + up \ + -d \ + --force-recreate + ``` + +- GPU: + + ```sh + docker-compose \ + -p ${USER} \ + -f ./docker/docker-compose.dev.gpu.yml \ + up \ + -d \ + --force-recreate + ``` You can access the container with the typical Docker [exec](https://docs.docker.com/engine/reference/commandline/exec/)-command, i.e. -``` -docker exec -it softlearning bash -``` +- CPU: + + ```sh + docker exec -it softlearning-dev-cpu bash + pip install -e . + ``` + +- GPU: + + ```sh + docker exec -it softlearning-dev-gpu bash + pip install -e . + # Make sure to add the `--trial-gpus` flag for the `softlearning` command. + ``` See examples section for examples of how to train and simulate the agents. Finally, to clean up the docker setup: -``` -docker-compose \ - -f ./docker/docker-compose.dev.cpu.yml \ - down \ - --rmi all \ - --volumes -``` + +- CPU: + + ```sh + docker-compose \ + -f ./docker/docker-compose.dev.cpu.yml \ + down \ + --rmi all \ + --volumes + ``` + +- GPU: + + ```sh + docker-compose \ + -f ./docker/docker-compose.dev.gpu.yml \ + down \ + --rmi all \ + --volumes + ``` ## Examples ### Training and simulating an agent diff --git a/docker/Dockerfile.softlearning.base.cpu b/docker/Dockerfile.softlearning.base.cpu index 5f3c9fc94..7ffe8023a 100644 --- a/docker/Dockerfile.softlearning.base.cpu +++ b/docker/Dockerfile.softlearning.base.cpu @@ -51,8 +51,8 @@ RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86 echo ". /opt/conda/etc/profile.d/conda.sh" >> /etc/bash.bashrc RUN apt-get install -y curl grep sed dpkg && \ - TINI_VERSION=`curl https://github.com/krallin/tini/releases/latest | grep -o "/v.*\"" | sed 's:^..\(.*\).$:\1:'` && \ - curl -L "https://github.com/krallin/tini/releases/download/v${TINI_VERSION}/tini_${TINI_VERSION}.deb" > tini.deb && \ + TINI_VERSION=`curl https://api.github.com/repos/krallin/tini/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/'` && \ + curl -L "https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini_${TINI_VERSION:1}.deb" > tini.deb && \ dpkg -i tini.deb && \ rm tini.deb && \ apt-get clean \ @@ -133,10 +133,15 @@ COPY ./requirements.txt /tmp/ RUN --mount=type=secret,id=mjkey,dst=/root/.mujoco/mjkey.txt \ conda env update -f /tmp/environment.yml \ && conda clean --all -y +RUN source activate softlearning \ + && pip install setuptools==65.5.0 wheel==0.38.0 \ + && pip install gym==0.18.0 RUN echo "conda activate softlearning" >> ~/.bashrc \ && echo "cd ~/softlearning" >> ~/.bashrc +RUN git config --global --add safe.directory /root/softlearning + # =========== Container Entrypoint ============= COPY ./docker/entrypoint.sh /entrypoint.sh diff --git a/docker/Dockerfile.softlearning.base.gpu b/docker/Dockerfile.softlearning.base.gpu index 226345c4b..7882310af 100644 --- a/docker/Dockerfile.softlearning.base.gpu +++ b/docker/Dockerfile.softlearning.base.gpu @@ -25,18 +25,11 @@ # --force-recreate -ARG UBUNTU_VERSION=18.04 -ARG ARCH= -ARG CUDA=10.0 +# ARG UBUNTU_VERSION=20.04 -FROM nvidia/cudagl${ARCH:+-$ARCH}:${CUDA}-base-ubuntu${UBUNTU_VERSION} as base -# ARCH and CUDA are specified again because the FROM directive resets ARGs -# (but their default value is retained if set previously) +FROM nvcr.io/nvidia/tensorflow:21.05-tf2-py3 -ARG UBUNTU_VERSION -ARG ARCH -ARG CUDA -ARG CUDNN=7.4.1.5-1 +# ARG UBUNTU_VERSION SHELL ["/bin/bash", "-c"] @@ -58,8 +51,8 @@ RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86 echo ". /opt/conda/etc/profile.d/conda.sh" >> /etc/bash.bashrc RUN apt-get install -y curl grep sed dpkg && \ - TINI_VERSION=`curl https://github.com/krallin/tini/releases/latest | grep -o "/v.*\"" | sed 's:^..\(.*\).$:\1:'` && \ - curl -L "https://github.com/krallin/tini/releases/download/v${TINI_VERSION}/tini_${TINI_VERSION}.deb" > tini.deb && \ + TINI_VERSION=`curl https://api.github.com/repos/krallin/tini/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/'` && \ + curl -L "https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini_${TINI_VERSION:1}.deb" > tini.deb && \ dpkg -i tini.deb && \ rm tini.deb && \ apt-get clean \ @@ -70,38 +63,6 @@ RUN conda update -y --name base conda \ && conda clean --all -y -# ========== Tensorflow dependencies ========== -RUN apt-get update \ - && apt-get install -y --no-install-recommends \ - build-essential \ - cuda-command-line-tools-${CUDA/./-} \ - cuda-cublas-${CUDA/./-} \ - cuda-cufft-${CUDA/./-} \ - cuda-curand-${CUDA/./-} \ - cuda-cusolver-${CUDA/./-} \ - cuda-cusparse-${CUDA/./-} \ - curl \ - libcudnn7=${CUDNN}+cuda${CUDA} \ - libfreetype6-dev \ - libhdf5-serial-dev \ - libzmq3-dev \ - pkg-config \ - software-properties-common \ - zip \ - unzip \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -RUN [ ${ARCH} = ppc64le ] || (apt-get update && \ - apt-get install nvinfer-runtime-trt-repo-ubuntu${UBUNTU_VERSION/./}-5.0.2-ga-cuda${CUDA} \ - && apt-get update \ - && apt-get install -y --no-install-recommends libnvinfer5=5.0.2-1+cuda${CUDA} \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/*) - -# For CUDA profiling, TensorFlow requires CUPTI. -ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH - # ========== Softlearning dependencies ========== RUN apt-get update -y \ && apt-get install -y --no-install-recommends \ @@ -135,13 +96,6 @@ RUN apt-get update -y \ xpra \ xserver-xorg-dev \ xvfb \ - && export CLOUD_SDK_REPO="cloud-sdk-$(lsb_release -c -s)" \ - && echo "deb http://packages.cloud.google.com/apt $CLOUD_SDK_REPO main" \ - | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list \ - && curl https://packages.cloud.google.com/apt/doc/apt-key.gpg \ - | apt-key add - \ - && apt-get update -y \ - && apt-get install -y google-cloud-sdk \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* @@ -163,8 +117,8 @@ ENV LD_LIBRARY_PATH ${LD_LIBRARY_PATH}:/usr/lib/nvidia-000 # ========== Conda Environment ========== -COPY ./environment.yml /tmp/environment.yml -COPY ./requirements.txt /tmp/requirements.txt +COPY ./environment.yml /tmp/ +COPY ./requirements.txt /tmp/ # NOTE: Fetch `mjkey.txt` from secret mount to avoid writing it to the build # history. For details, see: @@ -172,10 +126,18 @@ COPY ./requirements.txt /tmp/requirements.txt RUN --mount=type=secret,id=mjkey,dst=/root/.mujoco/mjkey.txt \ conda env update -f /tmp/environment.yml \ && conda clean --all -y +RUN source activate softlearning \ + && pip install setuptools==65.5.0 wheel==0.38.0 \ + && pip install gym==0.18.0 RUN echo "conda activate softlearning" >> ~/.bashrc \ && echo "cd ~/softlearning" >> ~/.bashrc +RUN git config --global --add safe.directory /root/softlearning + +RUN ln /usr/local/cuda-11.3/targets/x86_64-linux/lib/libcusolver.so.11 /usr/local/cuda-11.3/targets/x86_64-linux/lib/libcusolver.so.10 +ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda-11.3/targets/x86_64-linux/lib/ + # =========== Container Entrypoint ============= COPY ./docker/entrypoint.sh /entrypoint.sh diff --git a/docker/docker-compose.dev.gpu.yml b/docker/docker-compose.dev.gpu.yml index b77ea1f0b..6b51311cd 100644 --- a/docker/docker-compose.dev.gpu.yml +++ b/docker/docker-compose.dev.gpu.yml @@ -29,3 +29,4 @@ services: - bash stdin_open: true tty: true + shm_size: '60gb' diff --git a/requirements.txt b/requirements.txt index 035cd8865..35934f82a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -17,8 +17,6 @@ click==7.1.2 cloudpickle==1.6.0 colorama==0.4.4 colorful==0.5.4 -conda==4.9.2 -conda-package-handling==1.7.2 cryptography==3.3.2 cycler==0.10.0 Cython==0.29.21 @@ -42,7 +40,6 @@ googleapis-common-protos==1.52.0 gpustat==0.6.0 grpcio==1.32.0 gtimer==1.0.0b5 -gym==0.18.0 h5py==2.10.0 hiredis==1.1.0 idna==2.10 @@ -61,7 +58,7 @@ lxml==4.6.2 Markdown==3.3.3 matplotlib==3.3.3 msgpack==1.0.2 -mujoco-py==2.0.2.13 +mujoco-py==2.0.2.10 multidict==5.1.0 networkx==2.5 numpy==1.19.5