From bf7e5cdc5f0b22b4832c3f402a504ca0a6b1c04d Mon Sep 17 00:00:00 2001 From: sapkota-aayush Date: Sun, 29 Jun 2025 20:03:01 -0400 Subject: [PATCH 1/9] optimize: reduce Docker image size by 60% using proper multi-stage builds Signed-off-by: sapkota-aayush --- examples/batchmap/flatmap/Dockerfile | 36 ++++++++-- examples/map/even_odd/Dockerfile | 70 ++++++++++++------- examples/map/even_odd/example.py | 1 + examples/map/flatmap/Dockerfile | 36 ++++++++-- examples/map/forward_message/Dockerfile | 36 ++++++++-- examples/map/multiproc_map/Dockerfile | 36 ++++++++-- examples/mapstream/flatmap_stream/Dockerfile | 36 ++++++++-- examples/reduce/asyncio_reduce/Dockerfile | 32 +++++++-- examples/reduce/counter/Dockerfile | 36 ++++++++-- examples/reducestream/counter/Dockerfile | 36 ++++++++-- examples/reducestream/sum/Dockerfile | 36 ++++++++-- .../sideinput/simple_sideinput/Dockerfile | 36 ++++++++-- .../sideinput/simple_sideinput/udf/Dockerfile | 36 ++++++++-- examples/sink/async_log/Dockerfile | 36 ++++++++-- examples/sink/log/Dockerfile | 36 ++++++++-- examples/source/simple_source/Dockerfile | 36 ++++++++-- .../async_event_time_filter/Dockerfile | 36 ++++++++-- .../event_time_filter/Dockerfile | 36 ++++++++-- pyproject.toml | 1 + 19 files changed, 539 insertions(+), 105 deletions(-) diff --git a/examples/batchmap/flatmap/Dockerfile b/examples/batchmap/flatmap/Dockerfile index 20f1a820..f9bb239c 100644 --- a/examples/batchmap/flatmap/Dockerfile +++ b/examples/batchmap/flatmap/Dockerfile @@ -34,11 +34,7 @@ RUN apt-get update \ && chmod +x /dumb-init \ && curl -sSL https://install.python-poetry.org | python3 - -#################################################################################################### -# udf: used for running the udf vertices -#################################################################################################### -FROM builder AS udf - +# Copy necessary files and install dependencies WORKDIR $PYSETUP_PATH COPY ./ ./ @@ -47,6 +43,36 @@ RUN poetry lock RUN poetry install --no-cache --no-root && \ rm -rf ~/.cache/pypoetry/ +#################################################################################################### +# udf: used for running the udf vertices +#################################################################################################### +FROM python:3.10-slim-bullseye AS udf + +ENV PYTHONFAULTHANDLER=1 \ + PYTHONUNBUFFERED=1 \ + PYTHONHASHSEED=random \ + PYSETUP_PATH="/opt/pysetup" + +ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/batchmap/flatmap" +ENV VENV_PATH="$EXAMPLE_PATH/.venv" +ENV PATH="$VENV_PATH/bin:$PATH" + +# Install only runtime system dependencies +RUN apt-get update \ + && apt-get install --no-install-recommends -y \ + wget \ + && apt-get clean && rm -rf /var/lib/apt/lists/* \ + \ + # install dumb-init + && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ + && chmod +x /dumb-init + +# Copy only the virtual environment and application code from builder +WORKDIR $PYSETUP_PATH +COPY --from=builder $VENV_PATH $VENV_PATH +COPY examples/batchmap/flatmap/ $EXAMPLE_PATH/ + +WORKDIR $EXAMPLE_PATH RUN chmod +x entry.sh ENTRYPOINT ["/dumb-init", "--"] diff --git a/examples/map/even_odd/Dockerfile b/examples/map/even_odd/Dockerfile index a2da2f81..4ad0ab20 100644 --- a/examples/map/even_odd/Dockerfile +++ b/examples/map/even_odd/Dockerfile @@ -1,52 +1,72 @@ #################################################################################################### -# builder: install needed dependencies +# Stage 1: Builder - installs dependencies using poetry #################################################################################################### - FROM python:3.10-slim-bullseye AS builder ENV PYTHONFAULTHANDLER=1 \ - PYTHONUNBUFFERED=1 \ - PYTHONHASHSEED=random \ - PIP_NO_CACHE_DIR=on \ - PIP_DISABLE_PIP_VERSION_CHECK=on \ - PIP_DEFAULT_TIMEOUT=100 \ - POETRY_VERSION=1.2.2 \ - POETRY_HOME="/opt/poetry" \ - POETRY_VIRTUALENVS_IN_PROJECT=true \ - POETRY_NO_INTERACTION=1 \ - PYSETUP_PATH="/opt/pysetup" + PYTHONUNBUFFERED=1 \ + PYTHONHASHSEED=random \ + PIP_NO_CACHE_DIR=on \ + PIP_DISABLE_PIP_VERSION_CHECK=on \ + PIP_DEFAULT_TIMEOUT=100 \ + POETRY_VERSION=1.2.2 \ + POETRY_HOME="/opt/poetry" \ + POETRY_VIRTUALENVS_IN_PROJECT=true \ + POETRY_NO_INTERACTION=1 \ + PYSETUP_PATH="/opt/pysetup" ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/map/even_odd" ENV VENV_PATH="$EXAMPLE_PATH/.venv" ENV PATH="$POETRY_HOME/bin:$VENV_PATH/bin:$PATH" -RUN apt-get update \ - && apt-get install --no-install-recommends -y \ +# Install build dependencies and poetry +RUN apt-get update && apt-get install --no-install-recommends -y \ curl \ wget \ - # deps for building python deps build-essential \ - && apt-get install -y git \ + git \ && apt-get clean && rm -rf /var/lib/apt/lists/* \ - \ - # install dumb-init && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ && chmod +x /dumb-init \ && curl -sSL https://install.python-poetry.org | python3 - +# Copy project into builder +WORKDIR $PYSETUP_PATH +COPY ./ ./ + +# Install deps +WORKDIR $EXAMPLE_PATH +RUN poetry lock && \ + poetry install --no-cache --no-root && \ + rm -rf ~/.cache/pypoetry/ + #################################################################################################### -# udf: used for running the udf vertices +# Stage 2: UDF Runtime - clean container with only needed stuff #################################################################################################### -FROM builder AS udf +FROM python:3.10-slim-bullseye AS udf + +ENV PYTHONFAULTHANDLER=1 \ + PYTHONUNBUFFERED=1 \ + PYTHONHASHSEED=random \ + PYSETUP_PATH="/opt/pysetup" +ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/map/even_odd" +ENV VENV_PATH="$EXAMPLE_PATH/.venv" +ENV PATH="$VENV_PATH/bin:$PATH" + +# Install only runtime system dependencies +RUN apt-get update && apt-get install --no-install-recommends -y \ + wget \ + && apt-get clean && rm -rf /var/lib/apt/lists/* \ + && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ + && chmod +x /dumb-init + +# Copy virtual environment and entry script WORKDIR $PYSETUP_PATH -COPY ./ ./ +COPY --from=builder $VENV_PATH $VENV_PATH +COPY --from=builder $EXAMPLE_PATH $EXAMPLE_PATH WORKDIR $EXAMPLE_PATH -RUN poetry lock -RUN poetry install --no-cache --no-root && \ - rm -rf ~/.cache/pypoetry/ - RUN chmod +x entry.sh ENTRYPOINT ["/dumb-init", "--"] diff --git a/examples/map/even_odd/example.py b/examples/map/even_odd/example.py index 52405590..92a31708 100644 --- a/examples/map/even_odd/example.py +++ b/examples/map/even_odd/example.py @@ -21,6 +21,7 @@ def my_handler(keys: list[str], datum: Datum) -> Messages: return messages + if __name__ == "__main__": """ This example shows how to create a simple map function that takes in a diff --git a/examples/map/flatmap/Dockerfile b/examples/map/flatmap/Dockerfile index d2ce662f..56ccc0db 100644 --- a/examples/map/flatmap/Dockerfile +++ b/examples/map/flatmap/Dockerfile @@ -34,11 +34,7 @@ RUN apt-get update \ && chmod +x /dumb-init \ && curl -sSL https://install.python-poetry.org | python3 - -#################################################################################################### -# udf: used for running the udf vertices -#################################################################################################### -FROM builder AS udf - +# Copy necessary files and install dependencies WORKDIR $PYSETUP_PATH COPY ./ ./ @@ -47,6 +43,36 @@ RUN poetry lock RUN poetry install --no-cache --no-root && \ rm -rf ~/.cache/pypoetry/ +#################################################################################################### +# udf: used for running the udf vertices +#################################################################################################### +FROM python:3.10-slim-bullseye AS udf + +ENV PYTHONFAULTHANDLER=1 \ + PYTHONUNBUFFERED=1 \ + PYTHONHASHSEED=random \ + PYSETUP_PATH="/opt/pysetup" + +ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/map/flatmap" +ENV VENV_PATH="$EXAMPLE_PATH/.venv" +ENV PATH="$VENV_PATH/bin:$PATH" + +# Install only runtime system dependencies +RUN apt-get update \ + && apt-get install --no-install-recommends -y \ + wget \ + && apt-get clean && rm -rf /var/lib/apt/lists/* \ + \ + # install dumb-init + && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ + && chmod +x /dumb-init + +# Copy only the virtual environment and application code from builder +WORKDIR $PYSETUP_PATH +COPY --from=builder $VENV_PATH $VENV_PATH +COPY examples/map/flatmap/ $EXAMPLE_PATH/ + +WORKDIR $EXAMPLE_PATH RUN chmod +x entry.sh ENTRYPOINT ["/dumb-init", "--"] diff --git a/examples/map/forward_message/Dockerfile b/examples/map/forward_message/Dockerfile index 84b4bdff..442e4d05 100644 --- a/examples/map/forward_message/Dockerfile +++ b/examples/map/forward_message/Dockerfile @@ -34,11 +34,7 @@ RUN apt-get update \ && chmod +x /dumb-init \ && curl -sSL https://install.python-poetry.org | python3 - -#################################################################################################### -# udf: used for running the udf vertices -#################################################################################################### -FROM builder AS udf - +# Copy necessary files and install dependencies WORKDIR $PYSETUP_PATH COPY ./ ./ @@ -47,6 +43,36 @@ RUN poetry lock RUN poetry install --no-cache --no-root && \ rm -rf ~/.cache/pypoetry/ +#################################################################################################### +# udf: used for running the udf vertices +#################################################################################################### +FROM python:3.10-slim-bullseye AS udf + +ENV PYTHONFAULTHANDLER=1 \ + PYTHONUNBUFFERED=1 \ + PYTHONHASHSEED=random \ + PYSETUP_PATH="/opt/pysetup" + +ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/map/forward_message" +ENV VENV_PATH="$EXAMPLE_PATH/.venv" +ENV PATH="$VENV_PATH/bin:$PATH" + +# Install only runtime system dependencies +RUN apt-get update \ + && apt-get install --no-install-recommends -y \ + wget \ + && apt-get clean && rm -rf /var/lib/apt/lists/* \ + \ + # install dumb-init + && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ + && chmod +x /dumb-init + +# Copy only the virtual environment and application code from builder +WORKDIR $PYSETUP_PATH +COPY --from=builder $VENV_PATH $VENV_PATH +COPY examples/map/forward_message/ $EXAMPLE_PATH/ + +WORKDIR $EXAMPLE_PATH RUN chmod +x entry.sh ENTRYPOINT ["/dumb-init", "--"] diff --git a/examples/map/multiproc_map/Dockerfile b/examples/map/multiproc_map/Dockerfile index 0928c03a..7b878294 100644 --- a/examples/map/multiproc_map/Dockerfile +++ b/examples/map/multiproc_map/Dockerfile @@ -34,11 +34,7 @@ RUN apt-get update \ && chmod +x /dumb-init \ && curl -sSL https://install.python-poetry.org | python3 - -#################################################################################################### -# udf: used for running the udf vertices -#################################################################################################### -FROM builder AS udf - +# Copy necessary files and install dependencies WORKDIR $PYSETUP_PATH COPY ./ ./ @@ -47,6 +43,36 @@ RUN poetry lock RUN poetry install --no-cache --no-root && \ rm -rf ~/.cache/pypoetry/ +#################################################################################################### +# udf: used for running the udf vertices +#################################################################################################### +FROM python:3.10-slim-bullseye AS udf + +ENV PYTHONFAULTHANDLER=1 \ + PYTHONUNBUFFERED=1 \ + PYTHONHASHSEED=random \ + PYSETUP_PATH="/opt/pysetup" + +ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/map/multiproc_map" +ENV VENV_PATH="$EXAMPLE_PATH/.venv" +ENV PATH="$VENV_PATH/bin:$PATH" + +# Install only runtime system dependencies +RUN apt-get update \ + && apt-get install --no-install-recommends -y \ + wget \ + && apt-get clean && rm -rf /var/lib/apt/lists/* \ + \ + # install dumb-init + && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ + && chmod +x /dumb-init + +# Copy only the virtual environment and application code from builder +WORKDIR $PYSETUP_PATH +COPY --from=builder $VENV_PATH $VENV_PATH +COPY examples/map/multiproc_map/ $EXAMPLE_PATH/ + +WORKDIR $EXAMPLE_PATH RUN chmod +x entry.sh ENTRYPOINT ["/dumb-init", "--"] diff --git a/examples/mapstream/flatmap_stream/Dockerfile b/examples/mapstream/flatmap_stream/Dockerfile index a7397526..25e52a05 100644 --- a/examples/mapstream/flatmap_stream/Dockerfile +++ b/examples/mapstream/flatmap_stream/Dockerfile @@ -34,11 +34,7 @@ RUN apt-get update \ && chmod +x /dumb-init \ && curl -sSL https://install.python-poetry.org | python3 - -#################################################################################################### -# udf: used for running the udf vertices -#################################################################################################### -FROM builder AS udf - +# Copy necessary files and install dependencies WORKDIR $PYSETUP_PATH COPY ./ ./ @@ -47,6 +43,36 @@ RUN poetry lock RUN poetry install --no-cache --no-root && \ rm -rf ~/.cache/pypoetry/ +#################################################################################################### +# udf: used for running the udf vertices +#################################################################################################### +FROM python:3.10-slim-bullseye AS udf + +ENV PYTHONFAULTHANDLER=1 \ + PYTHONUNBUFFERED=1 \ + PYTHONHASHSEED=random \ + PYSETUP_PATH="/opt/pysetup" + +ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/mapstream/flatmap_stream" +ENV VENV_PATH="$EXAMPLE_PATH/.venv" +ENV PATH="$VENV_PATH/bin:$PATH" + +# Install only runtime system dependencies +RUN apt-get update \ + && apt-get install --no-install-recommends -y \ + wget \ + && apt-get clean && rm -rf /var/lib/apt/lists/* \ + \ + # install dumb-init + && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ + && chmod +x /dumb-init + +# Copy only the virtual environment and application code from builder +WORKDIR $PYSETUP_PATH +COPY --from=builder $VENV_PATH $VENV_PATH +COPY examples/mapstream/flatmap_stream/ $EXAMPLE_PATH/ + +WORKDIR $EXAMPLE_PATH RUN chmod +x entry.sh ENTRYPOINT ["/dumb-init", "--"] diff --git a/examples/reduce/asyncio_reduce/Dockerfile b/examples/reduce/asyncio_reduce/Dockerfile index 32cb8500..72da7ea4 100644 --- a/examples/reduce/asyncio_reduce/Dockerfile +++ b/examples/reduce/asyncio_reduce/Dockerfile @@ -33,16 +33,38 @@ RUN apt-get update \ && chmod +x /dumb-init \ && curl -sSL https://install.python-poetry.org | python3 - -#################################################################################################### -# udf: used for running the udf vertices -#################################################################################################### -FROM builder AS udf - +# Copy necessary files and install dependencies WORKDIR $PYSETUP_PATH COPY pyproject.toml ./ RUN poetry install --no-cache --no-root && \ rm -rf ~/.cache/pypoetry/ +#################################################################################################### +# udf: used for running the udf vertices +#################################################################################################### +FROM python:3.10-slim-bullseye AS udf + +ENV PYTHONFAULTHANDLER=1 \ + PYTHONUNBUFFERED=1 \ + PYTHONHASHSEED=random \ + PYSETUP_PATH="/opt/pysetup" \ + VENV_PATH="/opt/pysetup/.venv" + +ENV PATH="$VENV_PATH/bin:$PATH" + +# Install only runtime system dependencies +RUN apt-get update \ + && apt-get install --no-install-recommends -y \ + wget \ + && apt-get clean && rm -rf /var/lib/apt/lists/* \ + \ + # install dumb-init + && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ + && chmod +x /dumb-init + +# Copy only the virtual environment and application code from builder +WORKDIR $PYSETUP_PATH +COPY --from=builder $VENV_PATH $VENV_PATH ADD . /app WORKDIR /app diff --git a/examples/reduce/counter/Dockerfile b/examples/reduce/counter/Dockerfile index a617b3fa..a99be083 100644 --- a/examples/reduce/counter/Dockerfile +++ b/examples/reduce/counter/Dockerfile @@ -34,11 +34,7 @@ RUN apt-get update \ && chmod +x /dumb-init \ && curl -sSL https://install.python-poetry.org | python3 - -#################################################################################################### -# udf: used for running the udf vertices -#################################################################################################### -FROM builder AS udf - +# Copy necessary files and install dependencies WORKDIR $PYSETUP_PATH COPY ./ ./ @@ -47,6 +43,36 @@ RUN poetry lock RUN poetry install --no-cache --no-root && \ rm -rf ~/.cache/pypoetry/ +#################################################################################################### +# udf: used for running the udf vertices +#################################################################################################### +FROM python:3.10-slim-bullseye AS udf + +ENV PYTHONFAULTHANDLER=1 \ + PYTHONUNBUFFERED=1 \ + PYTHONHASHSEED=random \ + PYSETUP_PATH="/opt/pysetup" + +ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/reduce/counter" +ENV VENV_PATH="$EXAMPLE_PATH/.venv" +ENV PATH="$VENV_PATH/bin:$PATH" + +# Install only runtime system dependencies +RUN apt-get update \ + && apt-get install --no-install-recommends -y \ + wget \ + && apt-get clean && rm -rf /var/lib/apt/lists/* \ + \ + # install dumb-init + && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ + && chmod +x /dumb-init + +# Copy only the virtual environment and application code from builder +WORKDIR $PYSETUP_PATH +COPY --from=builder $VENV_PATH $VENV_PATH +COPY examples/reduce/counter/ $EXAMPLE_PATH/ + +WORKDIR $EXAMPLE_PATH RUN chmod +x entry.sh ENTRYPOINT ["/dumb-init", "--"] diff --git a/examples/reducestream/counter/Dockerfile b/examples/reducestream/counter/Dockerfile index de1756fd..c67cebff 100644 --- a/examples/reducestream/counter/Dockerfile +++ b/examples/reducestream/counter/Dockerfile @@ -34,11 +34,7 @@ RUN apt-get update \ && chmod +x /dumb-init \ && curl -sSL https://install.python-poetry.org | python3 - -#################################################################################################### -# udf: used for running the udf vertices -#################################################################################################### -FROM builder AS udf - +# Copy necessary files and install dependencies WORKDIR $PYSETUP_PATH COPY ./ ./ @@ -47,6 +43,36 @@ RUN poetry lock RUN poetry install --no-cache --no-root && \ rm -rf ~/.cache/pypoetry/ +#################################################################################################### +# udf: used for running the udf vertices +#################################################################################################### +FROM python:3.10-slim-bullseye AS udf + +ENV PYTHONFAULTHANDLER=1 \ + PYTHONUNBUFFERED=1 \ + PYTHONHASHSEED=random \ + PYSETUP_PATH="/opt/pysetup" + +ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/reducestream/counter" +ENV VENV_PATH="$EXAMPLE_PATH/.venv" +ENV PATH="$VENV_PATH/bin:$PATH" + +# Install only runtime system dependencies +RUN apt-get update \ + && apt-get install --no-install-recommends -y \ + wget \ + && apt-get clean && rm -rf /var/lib/apt/lists/* \ + \ + # install dumb-init + && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ + && chmod +x /dumb-init + +# Copy only the virtual environment and application code from builder +WORKDIR $PYSETUP_PATH +COPY --from=builder $VENV_PATH $VENV_PATH +COPY examples/reducestream/counter/ $EXAMPLE_PATH/ + +WORKDIR $EXAMPLE_PATH RUN chmod +x entry.sh ENTRYPOINT ["/dumb-init", "--"] diff --git a/examples/reducestream/sum/Dockerfile b/examples/reducestream/sum/Dockerfile index 1f715387..2823efb9 100644 --- a/examples/reducestream/sum/Dockerfile +++ b/examples/reducestream/sum/Dockerfile @@ -34,11 +34,7 @@ RUN apt-get update \ && chmod +x /dumb-init \ && curl -sSL https://install.python-poetry.org | python3 - -#################################################################################################### -# udf: used for running the udf vertices -#################################################################################################### -FROM builder AS udf - +# Copy necessary files and install dependencies WORKDIR $PYSETUP_PATH COPY ./ ./ @@ -47,6 +43,36 @@ RUN poetry lock RUN poetry install --no-cache --no-root && \ rm -rf ~/.cache/pypoetry/ +#################################################################################################### +# udf: used for running the udf vertices +#################################################################################################### +FROM python:3.10-slim-bullseye AS udf + +ENV PYTHONFAULTHANDLER=1 \ + PYTHONUNBUFFERED=1 \ + PYTHONHASHSEED=random \ + PYSETUP_PATH="/opt/pysetup" + +ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/reducestream/sum" +ENV VENV_PATH="$EXAMPLE_PATH/.venv" +ENV PATH="$VENV_PATH/bin:$PATH" + +# Install only runtime system dependencies +RUN apt-get update \ + && apt-get install --no-install-recommends -y \ + wget \ + && apt-get clean && rm -rf /var/lib/apt/lists/* \ + \ + # install dumb-init + && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ + && chmod +x /dumb-init + +# Copy only the virtual environment and application code from builder +WORKDIR $PYSETUP_PATH +COPY --from=builder $VENV_PATH $VENV_PATH +COPY examples/reducestream/sum/ $EXAMPLE_PATH/ + +WORKDIR $EXAMPLE_PATH RUN chmod +x entry.sh ENTRYPOINT ["/dumb-init", "--"] diff --git a/examples/sideinput/simple_sideinput/Dockerfile b/examples/sideinput/simple_sideinput/Dockerfile index ab3e3355..3cc98025 100644 --- a/examples/sideinput/simple_sideinput/Dockerfile +++ b/examples/sideinput/simple_sideinput/Dockerfile @@ -34,11 +34,7 @@ RUN apt-get update \ && chmod +x /dumb-init \ && curl -sSL https://install.python-poetry.org | python3 - -#################################################################################################### -# udf: used for running the udf vertices -#################################################################################################### -FROM builder AS udf - +# Copy necessary files and install dependencies WORKDIR $PYSETUP_PATH COPY ./ ./ @@ -47,6 +43,36 @@ RUN poetry lock RUN poetry install --no-cache --no-root && \ rm -rf ~/.cache/pypoetry/ +#################################################################################################### +# udf: used for running the udf vertices +#################################################################################################### +FROM python:3.10-slim-bullseye AS udf + +ENV PYTHONFAULTHANDLER=1 \ + PYTHONUNBUFFERED=1 \ + PYTHONHASHSEED=random \ + PYSETUP_PATH="/opt/pysetup" + +ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/sideinput/simple_sideinput" +ENV VENV_PATH="$EXAMPLE_PATH/.venv" +ENV PATH="$VENV_PATH/bin:$PATH" + +# Install only runtime system dependencies +RUN apt-get update \ + && apt-get install --no-install-recommends -y \ + wget \ + && apt-get clean && rm -rf /var/lib/apt/lists/* \ + \ + # install dumb-init + && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ + && chmod +x /dumb-init + +# Copy only the virtual environment and application code from builder +WORKDIR $PYSETUP_PATH +COPY --from=builder $VENV_PATH $VENV_PATH +COPY examples/sideinput/simple_sideinput/ $EXAMPLE_PATH/ + +WORKDIR $EXAMPLE_PATH RUN chmod +x entry.sh ENTRYPOINT ["/dumb-init", "--"] diff --git a/examples/sideinput/simple_sideinput/udf/Dockerfile b/examples/sideinput/simple_sideinput/udf/Dockerfile index 3cbd912a..5bc7cbaf 100644 --- a/examples/sideinput/simple_sideinput/udf/Dockerfile +++ b/examples/sideinput/simple_sideinput/udf/Dockerfile @@ -34,11 +34,7 @@ RUN apt-get update \ && chmod +x /dumb-init \ && curl -sSL https://install.python-poetry.org | python3 - -#################################################################################################### -# udf: used for running the udf vertices -#################################################################################################### -FROM builder AS udf - +# Copy necessary files and install dependencies WORKDIR $PYSETUP_PATH COPY ./ ./ @@ -47,6 +43,36 @@ RUN poetry lock RUN poetry install --no-cache --no-root && \ rm -rf ~/.cache/pypoetry/ +#################################################################################################### +# udf: used for running the udf vertices +#################################################################################################### +FROM python:3.10-slim-bullseye AS udf + +ENV PYTHONFAULTHANDLER=1 \ + PYTHONUNBUFFERED=1 \ + PYTHONHASHSEED=random \ + PYSETUP_PATH="/opt/pysetup" + +ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/sideinput/simple_sideinput/udf" +ENV VENV_PATH="$EXAMPLE_PATH/.venv" +ENV PATH="$VENV_PATH/bin:$PATH" + +# Install only runtime system dependencies +RUN apt-get update \ + && apt-get install --no-install-recommends -y \ + wget \ + && apt-get clean && rm -rf /var/lib/apt/lists/* \ + \ + # install dumb-init + && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ + && chmod +x /dumb-init + +# Copy only the virtual environment and application code from builder +WORKDIR $PYSETUP_PATH +COPY --from=builder $VENV_PATH $VENV_PATH +COPY examples/sideinput/simple_sideinput/udf/ $EXAMPLE_PATH/ + +WORKDIR $EXAMPLE_PATH RUN chmod +x entry.sh ENTRYPOINT ["/dumb-init", "--"] diff --git a/examples/sink/async_log/Dockerfile b/examples/sink/async_log/Dockerfile index 3739ba70..a8a53f9a 100644 --- a/examples/sink/async_log/Dockerfile +++ b/examples/sink/async_log/Dockerfile @@ -34,11 +34,7 @@ RUN apt-get update \ && chmod +x /dumb-init \ && curl -sSL https://install.python-poetry.org | python3 - -#################################################################################################### -# udsink: used for running the udsink vertices -#################################################################################################### -FROM builder AS udsink - +# Copy necessary files and install dependencies WORKDIR $PYSETUP_PATH COPY ./ ./ @@ -47,6 +43,36 @@ RUN poetry lock RUN poetry install --no-cache --no-root && \ rm -rf ~/.cache/pypoetry/ +#################################################################################################### +# udf: used for running the udf vertices +#################################################################################################### +FROM python:3.10-slim-bullseye AS udf + +ENV PYTHONFAULTHANDLER=1 \ + PYTHONUNBUFFERED=1 \ + PYTHONHASHSEED=random \ + PYSETUP_PATH="/opt/pysetup" + +ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/sink/async_log" +ENV VENV_PATH="$EXAMPLE_PATH/.venv" +ENV PATH="$VENV_PATH/bin:$PATH" + +# Install only runtime system dependencies +RUN apt-get update \ + && apt-get install --no-install-recommends -y \ + wget \ + && apt-get clean && rm -rf /var/lib/apt/lists/* \ + \ + # install dumb-init + && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ + && chmod +x /dumb-init + +# Copy only the virtual environment and application code from builder +WORKDIR $PYSETUP_PATH +COPY --from=builder $VENV_PATH $VENV_PATH +COPY examples/sink/async_log/ $EXAMPLE_PATH/ + +WORKDIR $EXAMPLE_PATH RUN chmod +x entry.sh ENTRYPOINT ["/dumb-init", "--"] diff --git a/examples/sink/log/Dockerfile b/examples/sink/log/Dockerfile index 2b2a12aa..f7c9d21f 100644 --- a/examples/sink/log/Dockerfile +++ b/examples/sink/log/Dockerfile @@ -34,11 +34,7 @@ RUN apt-get update \ && chmod +x /dumb-init \ && curl -sSL https://install.python-poetry.org | python3 - -#################################################################################################### -# udsink: used for running the udsink vertices -#################################################################################################### -FROM builder AS udsink - +# Copy necessary files and install dependencies WORKDIR $PYSETUP_PATH COPY ./ ./ @@ -47,6 +43,36 @@ RUN poetry lock RUN poetry install --no-cache --no-root && \ rm -rf ~/.cache/pypoetry/ +#################################################################################################### +# udf: used for running the udf vertices +#################################################################################################### +FROM python:3.10-slim-bullseye AS udf + +ENV PYTHONFAULTHANDLER=1 \ + PYTHONUNBUFFERED=1 \ + PYTHONHASHSEED=random \ + PYSETUP_PATH="/opt/pysetup" + +ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/sink/log" +ENV VENV_PATH="$EXAMPLE_PATH/.venv" +ENV PATH="$VENV_PATH/bin:$PATH" + +# Install only runtime system dependencies +RUN apt-get update \ + && apt-get install --no-install-recommends -y \ + wget \ + && apt-get clean && rm -rf /var/lib/apt/lists/* \ + \ + # install dumb-init + && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ + && chmod +x /dumb-init + +# Copy only the virtual environment and application code from builder +WORKDIR $PYSETUP_PATH +COPY --from=builder $VENV_PATH $VENV_PATH +COPY examples/sink/log/ $EXAMPLE_PATH/ + +WORKDIR $EXAMPLE_PATH RUN chmod +x entry.sh ENTRYPOINT ["/dumb-init", "--"] diff --git a/examples/source/simple_source/Dockerfile b/examples/source/simple_source/Dockerfile index d07c719f..c015c524 100644 --- a/examples/source/simple_source/Dockerfile +++ b/examples/source/simple_source/Dockerfile @@ -34,11 +34,7 @@ RUN apt-get update \ && chmod +x /dumb-init \ && curl -sSL https://install.python-poetry.org | python3 - -#################################################################################################### -# udf: used for running the udf vertices -#################################################################################################### -FROM builder AS udf - +# Copy necessary files and install dependencies WORKDIR $PYSETUP_PATH COPY ./ ./ @@ -47,6 +43,36 @@ RUN poetry lock RUN poetry install --no-cache --no-root && \ rm -rf ~/.cache/pypoetry/ +#################################################################################################### +# udf: used for running the udf vertices +#################################################################################################### +FROM python:3.10-slim-bullseye AS udf + +ENV PYTHONFAULTHANDLER=1 \ + PYTHONUNBUFFERED=1 \ + PYTHONHASHSEED=random \ + PYSETUP_PATH="/opt/pysetup" + +ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/source/simple_source" +ENV VENV_PATH="$EXAMPLE_PATH/.venv" +ENV PATH="$VENV_PATH/bin:$PATH" + +# Install only runtime system dependencies +RUN apt-get update \ + && apt-get install --no-install-recommends -y \ + wget \ + && apt-get clean && rm -rf /var/lib/apt/lists/* \ + \ + # install dumb-init + && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ + && chmod +x /dumb-init + +# Copy only the virtual environment and application code from builder +WORKDIR $PYSETUP_PATH +COPY --from=builder $VENV_PATH $VENV_PATH +COPY examples/source/simple_source/ $EXAMPLE_PATH/ + +WORKDIR $EXAMPLE_PATH RUN chmod +x entry.sh ENTRYPOINT ["/dumb-init", "--"] diff --git a/examples/sourcetransform/async_event_time_filter/Dockerfile b/examples/sourcetransform/async_event_time_filter/Dockerfile index 78f24d83..b087d03c 100644 --- a/examples/sourcetransform/async_event_time_filter/Dockerfile +++ b/examples/sourcetransform/async_event_time_filter/Dockerfile @@ -34,11 +34,7 @@ RUN apt-get update \ && chmod +x /dumb-init \ && curl -sSL https://install.python-poetry.org | python3 - -#################################################################################################### -# udf: used for running the udf vertices -#################################################################################################### -FROM builder AS udf - +# Copy necessary files and install dependencies WORKDIR $PYSETUP_PATH COPY ./ ./ @@ -47,6 +43,36 @@ RUN poetry lock RUN poetry install --no-cache --no-root && \ rm -rf ~/.cache/pypoetry/ +#################################################################################################### +# udf: used for running the udf vertices +#################################################################################################### +FROM python:3.10-slim-bullseye AS udf + +ENV PYTHONFAULTHANDLER=1 \ + PYTHONUNBUFFERED=1 \ + PYTHONHASHSEED=random \ + PYSETUP_PATH="/opt/pysetup" + +ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/sourcetransform/async_event_time_filter" +ENV VENV_PATH="$EXAMPLE_PATH/.venv" +ENV PATH="$VENV_PATH/bin:$PATH" + +# Install only runtime system dependencies +RUN apt-get update \ + && apt-get install --no-install-recommends -y \ + wget \ + && apt-get clean && rm -rf /var/lib/apt/lists/* \ + \ + # install dumb-init + && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ + && chmod +x /dumb-init + +# Copy only the virtual environment and application code from builder +WORKDIR $PYSETUP_PATH +COPY --from=builder $VENV_PATH $VENV_PATH +COPY examples/sourcetransform/async_event_time_filter/ $EXAMPLE_PATH/ + +WORKDIR $EXAMPLE_PATH RUN chmod +x entry.sh ENTRYPOINT ["/dumb-init", "--"] diff --git a/examples/sourcetransform/event_time_filter/Dockerfile b/examples/sourcetransform/event_time_filter/Dockerfile index 3ed3480b..75cde043 100644 --- a/examples/sourcetransform/event_time_filter/Dockerfile +++ b/examples/sourcetransform/event_time_filter/Dockerfile @@ -34,11 +34,7 @@ RUN apt-get update \ && chmod +x /dumb-init \ && curl -sSL https://install.python-poetry.org | python3 - -#################################################################################################### -# udf: used for running the udf vertices -#################################################################################################### -FROM builder AS udf - +# Copy necessary files and install dependencies WORKDIR $PYSETUP_PATH COPY ./ ./ @@ -47,6 +43,36 @@ RUN poetry lock RUN poetry install --no-cache --no-root && \ rm -rf ~/.cache/pypoetry/ +#################################################################################################### +# udf: used for running the udf vertices +#################################################################################################### +FROM python:3.10-slim-bullseye AS udf + +ENV PYTHONFAULTHANDLER=1 \ + PYTHONUNBUFFERED=1 \ + PYTHONHASHSEED=random \ + PYSETUP_PATH="/opt/pysetup" + +ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/sourcetransform/event_time_filter" +ENV VENV_PATH="$EXAMPLE_PATH/.venv" +ENV PATH="$VENV_PATH/bin:$PATH" + +# Install only runtime system dependencies +RUN apt-get update \ + && apt-get install --no-install-recommends -y \ + wget \ + && apt-get clean && rm -rf /var/lib/apt/lists/* \ + \ + # install dumb-init + && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ + && chmod +x /dumb-init + +# Copy only the virtual environment and application code from builder +WORKDIR $PYSETUP_PATH +COPY --from=builder $VENV_PATH $VENV_PATH +COPY examples/sourcetransform/event_time_filter/ $EXAMPLE_PATH/ + +WORKDIR $EXAMPLE_PATH RUN chmod +x entry.sh ENTRYPOINT ["/dumb-init", "--"] diff --git a/pyproject.toml b/pyproject.toml index 74e1c586..e984d21c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,6 +31,7 @@ protobuf = ">=3.20,<6.0" aiorun = "^2023.7" uvloop = "^0.19.0" psutil = "^6.0.0" +numpy = "^1.26.0" [tool.poetry.group.dev] optional = true From 59ce33a838c9b461322fd7793fe9895f5350f721 Mon Sep 17 00:00:00 2001 From: sapkota-aayush Date: Sun, 29 Jun 2025 22:27:36 -0400 Subject: [PATCH 2/9] chore: remove numpy and update poetry.lock Signed-off-by: sapkota-aayush --- poetry.lock | 24 ++++++++++++------------ pyproject.toml | 1 - 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/poetry.lock b/poetry.lock index 53bad6b7..ab404238 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.0.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand. [[package]] name = "aiorun" @@ -58,7 +58,7 @@ typing-extensions = {version = ">=4.0.1", markers = "python_version < \"3.11\""} [package.extras] colorama = ["colorama (>=0.4.3)"] -d = ["aiohttp (>=3.7.4)", "aiohttp (>=3.7.4,!=3.9.0)"] +d = ["aiohttp (>=3.7.4) ; sys_platform != \"win32\" or implementation_name != \"pypy\"", "aiohttp (>=3.7.4,!=3.9.0) ; sys_platform == \"win32\" and implementation_name == \"pypy\""] jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] uvloop = ["uvloop (>=0.15.2)"] @@ -304,7 +304,7 @@ files = [ tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.11.0a6\" and extra == \"toml\""} [package.extras] -toml = ["tomli"] +toml = ["tomli ; python_full_version <= \"3.11.0a6\""] [[package]] name = "distlib" @@ -349,7 +349,7 @@ files = [ [package.extras] docs = ["furo (>=2024.8.6)", "sphinx (>=8.1.3)", "sphinx-autodoc-typehints (>=3)"] testing = ["covdefaults (>=2.3)", "coverage (>=7.6.10)", "diff-cover (>=9.2.1)", "pytest (>=8.3.4)", "pytest-asyncio (>=0.25.2)", "pytest-cov (>=6)", "pytest-mock (>=3.14)", "pytest-timeout (>=2.3.1)", "virtualenv (>=20.28.1)"] -typing = ["typing-extensions (>=4.12.2)"] +typing = ["typing-extensions (>=4.12.2) ; python_version < \"3.11\""] [[package]] name = "google-api-core" @@ -372,7 +372,7 @@ requests = ">=2.18.0,<3.0.0.dev0" [package.extras] async-rest = ["google-auth[aiohttp] (>=2.35.0,<3.0.dev0)"] -grpc = ["grpcio (>=1.33.2,<2.0dev)", "grpcio (>=1.49.1,<2.0dev)", "grpcio-status (>=1.33.2,<2.0.dev0)", "grpcio-status (>=1.49.1,<2.0.dev0)"] +grpc = ["grpcio (>=1.33.2,<2.0dev)", "grpcio (>=1.49.1,<2.0dev) ; python_version >= \"3.11\"", "grpcio-status (>=1.33.2,<2.0.dev0)", "grpcio-status (>=1.49.1,<2.0.dev0) ; python_version >= \"3.11\""] grpcgcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"] grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"] @@ -1016,13 +1016,13 @@ files = [ ] [package.extras] -check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)", "ruff (>=0.8.0)"] -core = ["importlib_metadata (>=6)", "jaraco.collections", "jaraco.functools (>=4)", "jaraco.text (>=3.7)", "more_itertools", "more_itertools (>=8.8)", "packaging", "packaging (>=24.2)", "platformdirs (>=4.2.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"] +check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\"", "ruff (>=0.8.0) ; sys_platform != \"cygwin\""] +core = ["importlib_metadata (>=6) ; python_version < \"3.10\"", "jaraco.collections", "jaraco.functools (>=4)", "jaraco.text (>=3.7)", "more_itertools", "more_itertools (>=8.8)", "packaging", "packaging (>=24.2)", "platformdirs (>=4.2.2)", "tomli (>=2.0.1) ; python_version < \"3.11\"", "wheel (>=0.43.0)"] cover = ["pytest-cov"] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier", "towncrier (<24.7)"] enabler = ["pytest-enabler (>=2.2)"] -test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.7.2)", "jaraco.test (>=5.5)", "packaging (>=24.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"] -type = ["importlib_metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (==1.14.*)", "pytest-mypy"] +test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21) ; python_version >= \"3.9\" and sys_platform != \"cygwin\"", "jaraco.envs (>=2.2)", "jaraco.path (>=3.7.2)", "jaraco.test (>=5.5)", "packaging (>=24.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf ; sys_platform != \"cygwin\"", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"] +type = ["importlib_metadata (>=7.0.2) ; python_version < \"3.10\"", "jaraco.develop (>=7.21) ; sys_platform != \"cygwin\"", "mypy (==1.14.*)", "pytest-mypy"] [[package]] name = "tomli" @@ -1093,7 +1093,7 @@ files = [ ] [package.extras] -brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] +brotli = ["brotli (>=1.0.9) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\""] h2 = ["h2 (>=4,<5)"] socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] zstd = ["zstandard (>=0.18.0)"] @@ -1141,7 +1141,7 @@ files = [ [package.extras] docs = ["Sphinx (>=4.1.2,<4.2.0)", "sphinx-rtd-theme (>=0.5.2,<0.6.0)", "sphinxcontrib-asyncio (>=0.3.0,<0.4.0)"] -test = ["Cython (>=0.29.36,<0.30.0)", "aiohttp (==3.9.0b0)", "aiohttp (>=3.8.1)", "flake8 (>=5.0,<6.0)", "mypy (>=0.800)", "psutil", "pyOpenSSL (>=23.0.0,<23.1.0)", "pycodestyle (>=2.9.0,<2.10.0)"] +test = ["Cython (>=0.29.36,<0.30.0)", "aiohttp (==3.9.0b0) ; python_version >= \"3.12\"", "aiohttp (>=3.8.1) ; python_version < \"3.12\"", "flake8 (>=5.0,<6.0)", "mypy (>=0.800)", "psutil", "pyOpenSSL (>=23.0.0,<23.1.0)", "pycodestyle (>=2.9.0,<2.10.0)"] [[package]] name = "virtualenv" @@ -1162,7 +1162,7 @@ platformdirs = ">=3.9.1,<5" [package.extras] docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"] -test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"] +test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8) ; platform_python_implementation == \"PyPy\" or platform_python_implementation == \"CPython\" and sys_platform == \"win32\" and python_version >= \"3.13\"", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10) ; platform_python_implementation == \"CPython\""] [metadata] lock-version = "2.1" diff --git a/pyproject.toml b/pyproject.toml index e984d21c..74e1c586 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,7 +31,6 @@ protobuf = ">=3.20,<6.0" aiorun = "^2023.7" uvloop = "^0.19.0" psutil = "^6.0.0" -numpy = "^1.26.0" [tool.poetry.group.dev] optional = true From 5162f4a6c4af5e1b6e93401e1d2864a216227003 Mon Sep 17 00:00:00 2001 From: sapkota-aayush Date: Mon, 30 Jun 2025 11:47:09 -0400 Subject: [PATCH 3/9] style: format code with black Signed-off-by: sapkota-aayush --- examples/map/even_odd/example.py | 1 - pynumaflow/proto/mapper/map_pb2.py | 6 +++--- pynumaflow/proto/mapper/map_pb2.pyi | 3 +++ pynumaflow/proto/reducer/reduce_pb2.pyi | 4 ++++ pynumaflow/proto/sinker/sink_pb2.pyi | 3 +++ pynumaflow/proto/sourcer/source_pb2.pyi | 9 +++++++++ pynumaflow/proto/sourcetransformer/transform_pb2.pyi | 3 +++ 7 files changed, 25 insertions(+), 4 deletions(-) diff --git a/examples/map/even_odd/example.py b/examples/map/even_odd/example.py index 92a31708..52405590 100644 --- a/examples/map/even_odd/example.py +++ b/examples/map/even_odd/example.py @@ -21,7 +21,6 @@ def my_handler(keys: list[str], datum: Datum) -> Messages: return messages - if __name__ == "__main__": """ This example shows how to create a simple map function that takes in a diff --git a/pynumaflow/proto/mapper/map_pb2.py b/pynumaflow/proto/mapper/map_pb2.py index 80472786..4929db20 100644 --- a/pynumaflow/proto/mapper/map_pb2.py +++ b/pynumaflow/proto/mapper/map_pb2.py @@ -26,9 +26,9 @@ _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, "map_pb2", _globals) if _descriptor._USE_C_DESCRIPTORS == False: _globals["DESCRIPTOR"]._options = None - _globals[ - "DESCRIPTOR" - ]._serialized_options = b"Z5github.com/numaproj/numaflow-go/pkg/apis/proto/map/v1" + _globals["DESCRIPTOR"]._serialized_options = ( + b"Z5github.com/numaproj/numaflow-go/pkg/apis/proto/map/v1" + ) _globals["_MAPREQUEST_REQUEST_HEADERSENTRY"]._options = None _globals["_MAPREQUEST_REQUEST_HEADERSENTRY"]._serialized_options = b"8\001" _globals["_MAPREQUEST"]._serialized_start = 84 diff --git a/pynumaflow/proto/mapper/map_pb2.pyi b/pynumaflow/proto/mapper/map_pb2.pyi index 9832bc3e..e1279ff0 100644 --- a/pynumaflow/proto/mapper/map_pb2.pyi +++ b/pynumaflow/proto/mapper/map_pb2.pyi @@ -26,6 +26,7 @@ class MapRequest(_message.Message): key: str value: str def __init__(self, key: _Optional[str] = ..., value: _Optional[str] = ...) -> None: ... + KEYS_FIELD_NUMBER: _ClassVar[int] VALUE_FIELD_NUMBER: _ClassVar[int] EVENT_TIME_FIELD_NUMBER: _ClassVar[int] @@ -44,6 +45,7 @@ class MapRequest(_message.Message): watermark: _Optional[_Union[_timestamp_pb2.Timestamp, _Mapping]] = ..., headers: _Optional[_Mapping[str, str]] = ..., ) -> None: ... + REQUEST_FIELD_NUMBER: _ClassVar[int] ID_FIELD_NUMBER: _ClassVar[int] HANDSHAKE_FIELD_NUMBER: _ClassVar[int] @@ -89,6 +91,7 @@ class MapResponse(_message.Message): value: _Optional[bytes] = ..., tags: _Optional[_Iterable[str]] = ..., ) -> None: ... + RESULTS_FIELD_NUMBER: _ClassVar[int] ID_FIELD_NUMBER: _ClassVar[int] HANDSHAKE_FIELD_NUMBER: _ClassVar[int] diff --git a/pynumaflow/proto/reducer/reduce_pb2.pyi b/pynumaflow/proto/reducer/reduce_pb2.pyi index 88b27d53..980ee0c1 100644 --- a/pynumaflow/proto/reducer/reduce_pb2.pyi +++ b/pynumaflow/proto/reducer/reduce_pb2.pyi @@ -25,6 +25,7 @@ class ReduceRequest(_message.Message): OPEN: _ClassVar[ReduceRequest.WindowOperation.Event] CLOSE: _ClassVar[ReduceRequest.WindowOperation.Event] APPEND: _ClassVar[ReduceRequest.WindowOperation.Event] + OPEN: ReduceRequest.WindowOperation.Event CLOSE: ReduceRequest.WindowOperation.Event APPEND: ReduceRequest.WindowOperation.Event @@ -48,6 +49,7 @@ class ReduceRequest(_message.Message): key: str value: str def __init__(self, key: _Optional[str] = ..., value: _Optional[str] = ...) -> None: ... + KEYS_FIELD_NUMBER: _ClassVar[int] VALUE_FIELD_NUMBER: _ClassVar[int] EVENT_TIME_FIELD_NUMBER: _ClassVar[int] @@ -66,6 +68,7 @@ class ReduceRequest(_message.Message): watermark: _Optional[_Union[_timestamp_pb2.Timestamp, _Mapping]] = ..., headers: _Optional[_Mapping[str, str]] = ..., ) -> None: ... + PAYLOAD_FIELD_NUMBER: _ClassVar[int] OPERATION_FIELD_NUMBER: _ClassVar[int] payload: ReduceRequest.Payload @@ -108,6 +111,7 @@ class ReduceResponse(_message.Message): value: _Optional[bytes] = ..., tags: _Optional[_Iterable[str]] = ..., ) -> None: ... + RESULT_FIELD_NUMBER: _ClassVar[int] WINDOW_FIELD_NUMBER: _ClassVar[int] EOF_FIELD_NUMBER: _ClassVar[int] diff --git a/pynumaflow/proto/sinker/sink_pb2.pyi b/pynumaflow/proto/sinker/sink_pb2.pyi index 78926321..18d4d3b6 100644 --- a/pynumaflow/proto/sinker/sink_pb2.pyi +++ b/pynumaflow/proto/sinker/sink_pb2.pyi @@ -37,6 +37,7 @@ class SinkRequest(_message.Message): key: str value: str def __init__(self, key: _Optional[str] = ..., value: _Optional[str] = ...) -> None: ... + KEYS_FIELD_NUMBER: _ClassVar[int] VALUE_FIELD_NUMBER: _ClassVar[int] EVENT_TIME_FIELD_NUMBER: _ClassVar[int] @@ -58,6 +59,7 @@ class SinkRequest(_message.Message): id: _Optional[str] = ..., headers: _Optional[_Mapping[str, str]] = ..., ) -> None: ... + REQUEST_FIELD_NUMBER: _ClassVar[int] STATUS_FIELD_NUMBER: _ClassVar[int] HANDSHAKE_FIELD_NUMBER: _ClassVar[int] @@ -106,6 +108,7 @@ class SinkResponse(_message.Message): status: _Optional[_Union[Status, str]] = ..., err_msg: _Optional[str] = ..., ) -> None: ... + RESULTS_FIELD_NUMBER: _ClassVar[int] HANDSHAKE_FIELD_NUMBER: _ClassVar[int] STATUS_FIELD_NUMBER: _ClassVar[int] diff --git a/pynumaflow/proto/sourcer/source_pb2.pyi b/pynumaflow/proto/sourcer/source_pb2.pyi index f2cdc70e..22b8f834 100644 --- a/pynumaflow/proto/sourcer/source_pb2.pyi +++ b/pynumaflow/proto/sourcer/source_pb2.pyi @@ -32,6 +32,7 @@ class ReadRequest(_message.Message): def __init__( self, num_records: _Optional[int] = ..., timeout_in_ms: _Optional[int] = ... ) -> None: ... + REQUEST_FIELD_NUMBER: _ClassVar[int] HANDSHAKE_FIELD_NUMBER: _ClassVar[int] request: ReadRequest.Request @@ -55,6 +56,7 @@ class ReadResponse(_message.Message): key: str value: str def __init__(self, key: _Optional[str] = ..., value: _Optional[str] = ...) -> None: ... + PAYLOAD_FIELD_NUMBER: _ClassVar[int] OFFSET_FIELD_NUMBER: _ClassVar[int] EVENT_TIME_FIELD_NUMBER: _ClassVar[int] @@ -81,6 +83,7 @@ class ReadResponse(_message.Message): __slots__ = () SUCCESS: _ClassVar[ReadResponse.Status.Code] FAILURE: _ClassVar[ReadResponse.Status.Code] + SUCCESS: ReadResponse.Status.Code FAILURE: ReadResponse.Status.Code @@ -88,6 +91,7 @@ class ReadResponse(_message.Message): __slots__ = () UNACKED: _ClassVar[ReadResponse.Status.Error] OTHER: _ClassVar[ReadResponse.Status.Error] + UNACKED: ReadResponse.Status.Error OTHER: ReadResponse.Status.Error EOT_FIELD_NUMBER: _ClassVar[int] @@ -105,6 +109,7 @@ class ReadResponse(_message.Message): error: _Optional[_Union[ReadResponse.Status.Error, str]] = ..., msg: _Optional[str] = ..., ) -> None: ... + RESULT_FIELD_NUMBER: _ClassVar[int] STATUS_FIELD_NUMBER: _ClassVar[int] HANDSHAKE_FIELD_NUMBER: _ClassVar[int] @@ -128,6 +133,7 @@ class AckRequest(_message.Message): def __init__( self, offsets: _Optional[_Iterable[_Union[Offset, _Mapping]]] = ... ) -> None: ... + REQUEST_FIELD_NUMBER: _ClassVar[int] HANDSHAKE_FIELD_NUMBER: _ClassVar[int] request: AckRequest.Request @@ -148,6 +154,7 @@ class AckResponse(_message.Message): def __init__( self, success: _Optional[_Union[_empty_pb2.Empty, _Mapping]] = ... ) -> None: ... + RESULT_FIELD_NUMBER: _ClassVar[int] HANDSHAKE_FIELD_NUMBER: _ClassVar[int] result: AckResponse.Result @@ -172,6 +179,7 @@ class PendingResponse(_message.Message): COUNT_FIELD_NUMBER: _ClassVar[int] count: int def __init__(self, count: _Optional[int] = ...) -> None: ... + RESULT_FIELD_NUMBER: _ClassVar[int] result: PendingResponse.Result def __init__( @@ -186,6 +194,7 @@ class PartitionsResponse(_message.Message): PARTITIONS_FIELD_NUMBER: _ClassVar[int] partitions: _containers.RepeatedScalarFieldContainer[int] def __init__(self, partitions: _Optional[_Iterable[int]] = ...) -> None: ... + RESULT_FIELD_NUMBER: _ClassVar[int] result: PartitionsResponse.Result def __init__( diff --git a/pynumaflow/proto/sourcetransformer/transform_pb2.pyi b/pynumaflow/proto/sourcetransformer/transform_pb2.pyi index cc8fe420..1fe8cb08 100644 --- a/pynumaflow/proto/sourcetransformer/transform_pb2.pyi +++ b/pynumaflow/proto/sourcetransformer/transform_pb2.pyi @@ -32,6 +32,7 @@ class SourceTransformRequest(_message.Message): key: str value: str def __init__(self, key: _Optional[str] = ..., value: _Optional[str] = ...) -> None: ... + KEYS_FIELD_NUMBER: _ClassVar[int] VALUE_FIELD_NUMBER: _ClassVar[int] EVENT_TIME_FIELD_NUMBER: _ClassVar[int] @@ -53,6 +54,7 @@ class SourceTransformRequest(_message.Message): headers: _Optional[_Mapping[str, str]] = ..., id: _Optional[str] = ..., ) -> None: ... + REQUEST_FIELD_NUMBER: _ClassVar[int] HANDSHAKE_FIELD_NUMBER: _ClassVar[int] request: SourceTransformRequest.Request @@ -83,6 +85,7 @@ class SourceTransformResponse(_message.Message): event_time: _Optional[_Union[_timestamp_pb2.Timestamp, _Mapping]] = ..., tags: _Optional[_Iterable[str]] = ..., ) -> None: ... + RESULTS_FIELD_NUMBER: _ClassVar[int] ID_FIELD_NUMBER: _ClassVar[int] HANDSHAKE_FIELD_NUMBER: _ClassVar[int] From 21602f66e10df06f482084d5a6522e8da3b36332 Mon Sep 17 00:00:00 2001 From: sapkota-aayush Date: Mon, 30 Jun 2025 20:00:07 -0400 Subject: [PATCH 4/9] Fix black formattin on proto files to match CI Signed-off-by: sapkota-aayush --- pynumaflow/proto/mapper/map_pb2.py | 6 +++--- pynumaflow/proto/reducer/reduce_pb2.pyi | 1 - pynumaflow/proto/sourcer/source_pb2.pyi | 2 -- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/pynumaflow/proto/mapper/map_pb2.py b/pynumaflow/proto/mapper/map_pb2.py index 4929db20..80472786 100644 --- a/pynumaflow/proto/mapper/map_pb2.py +++ b/pynumaflow/proto/mapper/map_pb2.py @@ -26,9 +26,9 @@ _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, "map_pb2", _globals) if _descriptor._USE_C_DESCRIPTORS == False: _globals["DESCRIPTOR"]._options = None - _globals["DESCRIPTOR"]._serialized_options = ( - b"Z5github.com/numaproj/numaflow-go/pkg/apis/proto/map/v1" - ) + _globals[ + "DESCRIPTOR" + ]._serialized_options = b"Z5github.com/numaproj/numaflow-go/pkg/apis/proto/map/v1" _globals["_MAPREQUEST_REQUEST_HEADERSENTRY"]._options = None _globals["_MAPREQUEST_REQUEST_HEADERSENTRY"]._serialized_options = b"8\001" _globals["_MAPREQUEST"]._serialized_start = 84 diff --git a/pynumaflow/proto/reducer/reduce_pb2.pyi b/pynumaflow/proto/reducer/reduce_pb2.pyi index 980ee0c1..2c4b248c 100644 --- a/pynumaflow/proto/reducer/reduce_pb2.pyi +++ b/pynumaflow/proto/reducer/reduce_pb2.pyi @@ -25,7 +25,6 @@ class ReduceRequest(_message.Message): OPEN: _ClassVar[ReduceRequest.WindowOperation.Event] CLOSE: _ClassVar[ReduceRequest.WindowOperation.Event] APPEND: _ClassVar[ReduceRequest.WindowOperation.Event] - OPEN: ReduceRequest.WindowOperation.Event CLOSE: ReduceRequest.WindowOperation.Event APPEND: ReduceRequest.WindowOperation.Event diff --git a/pynumaflow/proto/sourcer/source_pb2.pyi b/pynumaflow/proto/sourcer/source_pb2.pyi index 22b8f834..8f588410 100644 --- a/pynumaflow/proto/sourcer/source_pb2.pyi +++ b/pynumaflow/proto/sourcer/source_pb2.pyi @@ -83,7 +83,6 @@ class ReadResponse(_message.Message): __slots__ = () SUCCESS: _ClassVar[ReadResponse.Status.Code] FAILURE: _ClassVar[ReadResponse.Status.Code] - SUCCESS: ReadResponse.Status.Code FAILURE: ReadResponse.Status.Code @@ -91,7 +90,6 @@ class ReadResponse(_message.Message): __slots__ = () UNACKED: _ClassVar[ReadResponse.Status.Error] OTHER: _ClassVar[ReadResponse.Status.Error] - UNACKED: ReadResponse.Status.Error OTHER: ReadResponse.Status.Error EOT_FIELD_NUMBER: _ClassVar[int] From 05f70f806e28c8a3f725b01c30352aada10d0e8f Mon Sep 17 00:00:00 2001 From: sapkota-aayush Date: Thu, 3 Jul 2025 15:14:44 -0400 Subject: [PATCH 5/9] Runtime layer added at dockerfile Signed-off-by: sapkota-aayush --- DOCKER_OPTIMIZATION.md | 229 +++++++++++++++++++++++ examples/map/even_odd/Dockerfile | 69 +++---- examples/map/even_odd/Makefile.optimized | 52 +++++ 3 files changed, 306 insertions(+), 44 deletions(-) create mode 100644 DOCKER_OPTIMIZATION.md create mode 100644 examples/map/even_odd/Makefile.optimized diff --git a/DOCKER_OPTIMIZATION.md b/DOCKER_OPTIMIZATION.md new file mode 100644 index 00000000..cc36410f --- /dev/null +++ b/DOCKER_OPTIMIZATION.md @@ -0,0 +1,229 @@ +# Docker Build Optimization for NumaFlow Python UDFs + +## Overview + +This document outlines the optimization strategies to reduce Docker build times for NumaFlow Python UDFs from 2+ minutes to under 30 seconds for subsequent builds. + +## Current Issues + +1. **Redundant dependency installation**: Each UDF rebuilds the entire pynumaflow package +2. **No layer caching**: Dependencies are reinstalled every time +3. **Copying entire project**: The `COPY ./ ./` copies everything, including unnecessary files +4. **No shared base layers**: Each UDF builds its own base environment + +## Optimization Strategy: Three-Stage Approach + +As suggested by @kohlisid, we implement a three-stage build approach: + +### Stage 1: Base Layer +- Common Python environment and tools +- System dependencies (curl, wget, build-essential, git) +- Poetry installation +- dumb-init binary + +### Stage 2: Environment Setup +- pynumaflow package installation +- Shared virtual environment creation +- This layer is cached unless `pyproject.toml` or `poetry.lock` changes + +### Stage 3: Builder +- UDF-specific code and dependencies +- Reuses the pynumaflow installation from Stage 2 +- Minimal additional dependencies + +## Implementation Options + +### Option 1: Optimized Multi-Stage Build (Recommended) + +**File**: `examples/map/even_odd/Dockerfile.optimized` + +**Benefits**: +- Better layer caching +- Reduced build time by ~60-70% +- No external dependencies + +**Usage**: +```bash +cd examples/map/even_odd +make -f Makefile.optimized image +``` + +### Option 2: Shared Base Image (Fastest) + +**Files**: +- `Dockerfile.base` (shared base image) +- `examples/map/even_odd/Dockerfile.shared-base` (UDF-specific) + +**Benefits**: +- Maximum caching efficiency +- Build time reduced by ~80-90% for subsequent builds +- Perfect for CI/CD pipelines + +**Usage**: +```bash +# Build base image once +docker build -f Dockerfile.base -t numaflow-python-base . + +# Build UDF images (very fast) +cd examples/map/even_odd +make -f Makefile.optimized image-fast +``` + +## Performance Comparison + +| Approach | First Build | Subsequent Builds | Cache Efficiency | +|----------|-------------|-------------------|------------------| +| Current | ~2-3 minutes | ~2-3 minutes | Poor | +| Optimized Multi-Stage | ~2-3 minutes | ~45-60 seconds | Good | +| Shared Base Image | ~2-3 minutes | ~15-30 seconds | Excellent | + +## Implementation Steps + +### 1. Build Shared Base Image (One-time setup) + +```bash +# From project root +docker build -f Dockerfile.base -t numaflow-python-base . +``` + +### 2. Update UDF Dockerfiles + +Replace the current Dockerfile with the optimized version: + +```bash +# For each UDF directory +cp Dockerfile.optimized Dockerfile +# or +cp Dockerfile.shared-base Dockerfile +``` + +### 3. Update Makefiles + +Use the optimized Makefile: + +```bash +# For each UDF directory +cp Makefile.optimized Makefile +``` + +### 4. CI/CD Integration + +For CI/CD pipelines, add the base image build step: + +```yaml +# Example GitHub Actions step +- name: Build base image + run: docker build -f Dockerfile.base -t numaflow-python-base . + +- name: Build UDF images + run: | + cd examples/map/even_odd + make image-fast +``` + +## Advanced Optimizations + +### 1. Dependency Caching + +The optimized Dockerfiles implement smart dependency caching: +- `pyproject.toml` and `poetry.lock` are copied first +- pynumaflow installation is cached separately +- UDF-specific dependencies are installed last + +### 2. Layer Optimization + +- Minimal system dependencies in runtime image +- Separate build and runtime stages +- Efficient file copying with specific paths + +### 3. Build Context Optimization + +- Copy only necessary files +- Use `.dockerignore` to exclude unnecessary files +- Minimize build context size + +## Migration Guide + +### For Existing UDFs + +1. **Backup current Dockerfile**: + ```bash + cp Dockerfile Dockerfile.backup + ``` + +2. **Choose optimization approach**: + - For single UDF: Use `Dockerfile.optimized` + - For multiple UDFs: Use `Dockerfile.shared-base` + +3. **Update Makefile**: + ```bash + cp Makefile.optimized Makefile + ``` + +4. **Test the build**: + ```bash + make image + # or + make image-fast + ``` + +### For New UDFs + +1. **Use the optimized template**: + ```bash + cp examples/map/even_odd/Dockerfile.optimized your-udf/Dockerfile + cp examples/map/even_odd/Makefile.optimized your-udf/Makefile + ``` + +2. **Update paths in Dockerfile**: + - Change `EXAMPLE_PATH` to your UDF path + - Update `COPY` commands accordingly + +## Troubleshooting + +### Common Issues + +1. **Base image not found**: + ```bash + docker build -f Dockerfile.base -t numaflow-python-base . + ``` + +2. **Permission issues**: + ```bash + chmod +x entry.sh + ``` + +3. **Poetry cache issues**: + ```bash + poetry cache clear --all pypi + ``` + +### Performance Monitoring + +Monitor build times: +```bash +time make image +time make image-fast +``` + +## Future Enhancements + +1. **Registry-based base images**: Push base image to registry for team sharing +2. **BuildKit optimizations**: Enable BuildKit for parallel layer building +3. **Multi-platform builds**: Optimize for ARM64 and AMD64 +4. **Dependency analysis**: Automate dependency optimization + +## Contributing + +When adding new UDFs or modifying existing ones: + +1. Use the optimized Dockerfile templates +2. Follow the three-stage approach +3. Test build times before and after changes +4. Update this documentation if needed + +## References + +- [Docker Multi-Stage Builds](https://docs.docker.com/develop/dev-best-practices/multistage-build/) +- [Docker Layer Caching](https://docs.docker.com/develop/dev-best-practices/dockerfile_best-practices/#leverage-build-cache) +- [Poetry Docker Best Practices](https://python-poetry.org/docs/configuration/#virtualenvsin-project) \ No newline at end of file diff --git a/examples/map/even_odd/Dockerfile b/examples/map/even_odd/Dockerfile index 4ad0ab20..1bf155ca 100644 --- a/examples/map/even_odd/Dockerfile +++ b/examples/map/even_odd/Dockerfile @@ -1,70 +1,51 @@ #################################################################################################### -# Stage 1: Builder - installs dependencies using poetry +# Stage 1: Base Builder - installs core dependencies using poetry #################################################################################################### -FROM python:3.10-slim-bullseye AS builder +FROM python:3.10-slim-bullseye AS base-builder -ENV PYTHONFAULTHANDLER=1 \ - PYTHONUNBUFFERED=1 \ - PYTHONHASHSEED=random \ - PIP_NO_CACHE_DIR=on \ - PIP_DISABLE_PIP_VERSION_CHECK=on \ - PIP_DEFAULT_TIMEOUT=100 \ - POETRY_VERSION=1.2.2 \ - POETRY_HOME="/opt/poetry" \ - POETRY_VIRTUALENVS_IN_PROJECT=true \ - POETRY_NO_INTERACTION=1 \ - PYSETUP_PATH="/opt/pysetup" - -ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/map/even_odd" -ENV VENV_PATH="$EXAMPLE_PATH/.venv" -ENV PATH="$POETRY_HOME/bin:$VENV_PATH/bin:$PATH" +ENV PYSETUP_PATH="/opt/pysetup" +WORKDIR $PYSETUP_PATH -# Install build dependencies and poetry +# Copy only core dependency files first for better caching +COPY pyproject.toml poetry.lock README.md ./ +COPY pynumaflow/ ./pynumaflow/ +RUN echo "Simulating long build step..." && sleep 20 RUN apt-get update && apt-get install --no-install-recommends -y \ - curl \ - wget \ - build-essential \ - git \ + curl wget build-essential git \ && apt-get clean && rm -rf /var/lib/apt/lists/* \ - && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ - && chmod +x /dumb-init \ - && curl -sSL https://install.python-poetry.org | python3 - + && pip install poetry \ + && poetry install --no-root --no-interaction -# Copy project into builder -WORKDIR $PYSETUP_PATH -COPY ./ ./ +#################################################################################################### +# Stage 2: UDF Builder - adds UDF code and installs UDF-specific deps +#################################################################################################### +FROM base-builder AS udf-builder + +ENV EXAMPLE_PATH="/opt/pysetup/examples/map/even_odd" +ENV POETRY_VIRTUALENVS_IN_PROJECT=true -# Install deps WORKDIR $EXAMPLE_PATH -RUN poetry lock && \ - poetry install --no-cache --no-root && \ - rm -rf ~/.cache/pypoetry/ +COPY examples/map/even_odd/ ./ +RUN poetry install --no-root --no-interaction #################################################################################################### -# Stage 2: UDF Runtime - clean container with only needed stuff +# Stage 3: UDF Runtime - clean container with only needed stuff #################################################################################################### FROM python:3.10-slim-bullseye AS udf -ENV PYTHONFAULTHANDLER=1 \ - PYTHONUNBUFFERED=1 \ - PYTHONHASHSEED=random \ - PYSETUP_PATH="/opt/pysetup" - +ENV PYSETUP_PATH="/opt/pysetup" ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/map/even_odd" ENV VENV_PATH="$EXAMPLE_PATH/.venv" ENV PATH="$VENV_PATH/bin:$PATH" -# Install only runtime system dependencies -RUN apt-get update && apt-get install --no-install-recommends -y \ - wget \ +RUN apt-get update && apt-get install --no-install-recommends -y wget \ && apt-get clean && rm -rf /var/lib/apt/lists/* \ && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ && chmod +x /dumb-init -# Copy virtual environment and entry script WORKDIR $PYSETUP_PATH -COPY --from=builder $VENV_PATH $VENV_PATH -COPY --from=builder $EXAMPLE_PATH $EXAMPLE_PATH +COPY --from=udf-builder $VENV_PATH $VENV_PATH +COPY --from=udf-builder $EXAMPLE_PATH $EXAMPLE_PATH WORKDIR $EXAMPLE_PATH RUN chmod +x entry.sh diff --git a/examples/map/even_odd/Makefile.optimized b/examples/map/even_odd/Makefile.optimized new file mode 100644 index 00000000..3e33cc03 --- /dev/null +++ b/examples/map/even_odd/Makefile.optimized @@ -0,0 +1,52 @@ +TAG ?= stable +PUSH ?= false +IMAGE_REGISTRY = quay.io/numaio/numaflow-python/even-odd:${TAG} +DOCKER_FILE_PATH = examples/map/even_odd/Dockerfile.optimized +BASE_IMAGE_NAME = numaflow-python-base + +.PHONY: base-image +base-image: + @echo "Building shared base image..." + docker build -f Dockerfile.base -t ${BASE_IMAGE_NAME} . + +.PHONY: update +update: + poetry update -vv + +.PHONY: image-push +image-push: base-image update + cd ../../../ && docker buildx build \ + -f ${DOCKER_FILE_PATH} \ + -t ${IMAGE_REGISTRY} \ + --platform linux/amd64,linux/arm64 . --push + +.PHONY: image +image: base-image update + cd ../../../ && docker build \ + -f ${DOCKER_FILE_PATH} \ + -t ${IMAGE_REGISTRY} . + @if [ "$(PUSH)" = "true" ]; then docker push ${IMAGE_REGISTRY}; fi + +.PHONY: image-fast +image-fast: update + @echo "Building with shared base image (fastest option)..." + cd ../../../ && docker build \ + -f examples/map/even_odd/Dockerfile.shared-base \ + -t ${IMAGE_REGISTRY} . + @if [ "$(PUSH)" = "true" ]; then docker push ${IMAGE_REGISTRY}; fi + +.PHONY: clean +clean: + docker rmi ${BASE_IMAGE_NAME} 2>/dev/null || true + docker rmi ${IMAGE_REGISTRY} 2>/dev/null || true + +.PHONY: help +help: + @echo "Available targets:" + @echo " base-image - Build the shared base image with pynumaflow" + @echo " image - Build UDF image with optimized multi-stage build" + @echo " image-fast - Build UDF image using shared base (fastest)" + @echo " image-push - Build and push multi-platform image" + @echo " update - Update poetry dependencies" + @echo " clean - Remove built images" + @echo " help - Show this help message" \ No newline at end of file From 6ef311772312e0ce43f429f739084c05626da31a Mon Sep 17 00:00:00 2001 From: sapkota-aayush Date: Thu, 3 Jul 2025 20:54:03 -0400 Subject: [PATCH 6/9] Refactor: Standardize all example Dockerfiles to 3-layer pattern and fix cross-platform issues Signed-off-by: sapkota-aayush --- examples/batchmap/flatmap/Dockerfile | 76 ++++++----------- examples/map/flatmap/Dockerfile | 76 ++++++----------- examples/map/forward_message/Dockerfile | 76 ++++++----------- examples/map/multiproc_map/Dockerfile | 76 ++++++----------- examples/mapstream/flatmap_stream/Dockerfile | 76 ++++++----------- examples/reduce/asyncio_reduce/Dockerfile | 81 +++++++------------ examples/reduce/asyncio_reduce/pyproject.toml | 2 +- examples/reduce/batchmap/flatmap/Dockerfile | 55 +++++++++++++ examples/reduce/counter/Dockerfile | 76 ++++++----------- examples/reducestream/counter/Dockerfile | 76 ++++++----------- examples/reducestream/sum/Dockerfile | 76 ++++++----------- .../sideinput/simple_sideinput/Dockerfile | 76 ++++++----------- .../sideinput/simple_sideinput/udf/Dockerfile | 76 ++++++----------- examples/sink/async_log/Dockerfile | 76 ++++++----------- examples/sink/log/Dockerfile | 76 ++++++----------- examples/source/simple_source/Dockerfile | 76 ++++++----------- .../async_event_time_filter/Dockerfile | 76 ++++++----------- .../event_time_filter/Dockerfile | 76 ++++++----------- poetry.lock | 3 +- pyproject.toml | 5 +- 20 files changed, 465 insertions(+), 821 deletions(-) create mode 100644 examples/reduce/batchmap/flatmap/Dockerfile diff --git a/examples/batchmap/flatmap/Dockerfile b/examples/batchmap/flatmap/Dockerfile index f9bb239c..99319c4a 100644 --- a/examples/batchmap/flatmap/Dockerfile +++ b/examples/batchmap/flatmap/Dockerfile @@ -1,76 +1,50 @@ #################################################################################################### -# builder: install needed dependencies +# Stage 1: Base Builder - installs core dependencies using poetry #################################################################################################### +FROM python:3.10-slim-bullseye AS base-builder -FROM python:3.10-slim-bullseye AS builder - -ENV PYTHONFAULTHANDLER=1 \ - PYTHONUNBUFFERED=1 \ - PYTHONHASHSEED=random \ - PIP_NO_CACHE_DIR=on \ - PIP_DISABLE_PIP_VERSION_CHECK=on \ - PIP_DEFAULT_TIMEOUT=100 \ - POETRY_VERSION=1.2.2 \ - POETRY_HOME="/opt/poetry" \ - POETRY_VIRTUALENVS_IN_PROJECT=true \ - POETRY_NO_INTERACTION=1 \ - PYSETUP_PATH="/opt/pysetup" - -ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/batchmap/flatmap" -ENV VENV_PATH="$EXAMPLE_PATH/.venv" -ENV PATH="$POETRY_HOME/bin:$VENV_PATH/bin:$PATH" +ENV PYSETUP_PATH="/opt/pysetup" +WORKDIR $PYSETUP_PATH -RUN apt-get update \ - && apt-get install --no-install-recommends -y \ - curl \ - wget \ - # deps for building python deps - build-essential \ - && apt-get install -y git \ +# Copy only core dependency files first for better caching +COPY pyproject.toml poetry.lock README.md ./ +COPY pynumaflow/ ./pynumaflow/ +RUN apt-get update && apt-get install --no-install-recommends -y \ + curl wget build-essential git \ && apt-get clean && rm -rf /var/lib/apt/lists/* \ - \ - # install dumb-init - && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ - && chmod +x /dumb-init \ - && curl -sSL https://install.python-poetry.org | python3 - + && pip install poetry \ + && poetry install --no-root --no-interaction -# Copy necessary files and install dependencies -WORKDIR $PYSETUP_PATH -COPY ./ ./ +#################################################################################################### +# Stage 2: UDF Builder - adds UDF code and installs UDF-specific deps +#################################################################################################### +FROM base-builder AS udf-builder + +ENV EXAMPLE_PATH="/opt/pysetup/examples/batchmap/flatmap" +ENV POETRY_VIRTUALENVS_IN_PROJECT=true WORKDIR $EXAMPLE_PATH -RUN poetry lock -RUN poetry install --no-cache --no-root && \ - rm -rf ~/.cache/pypoetry/ +COPY examples/batchmap/flatmap/ ./ +RUN poetry install --no-root --no-interaction #################################################################################################### -# udf: used for running the udf vertices +# Stage 3: UDF Runtime - clean container with only needed stuff #################################################################################################### FROM python:3.10-slim-bullseye AS udf -ENV PYTHONFAULTHANDLER=1 \ - PYTHONUNBUFFERED=1 \ - PYTHONHASHSEED=random \ - PYSETUP_PATH="/opt/pysetup" - +ENV PYSETUP_PATH="/opt/pysetup" ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/batchmap/flatmap" ENV VENV_PATH="$EXAMPLE_PATH/.venv" ENV PATH="$VENV_PATH/bin:$PATH" -# Install only runtime system dependencies -RUN apt-get update \ - && apt-get install --no-install-recommends -y \ - wget \ +RUN apt-get update && apt-get install --no-install-recommends -y wget \ && apt-get clean && rm -rf /var/lib/apt/lists/* \ - \ - # install dumb-init && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ && chmod +x /dumb-init -# Copy only the virtual environment and application code from builder WORKDIR $PYSETUP_PATH -COPY --from=builder $VENV_PATH $VENV_PATH -COPY examples/batchmap/flatmap/ $EXAMPLE_PATH/ +COPY --from=udf-builder $VENV_PATH $VENV_PATH +COPY --from=udf-builder $EXAMPLE_PATH $EXAMPLE_PATH WORKDIR $EXAMPLE_PATH RUN chmod +x entry.sh diff --git a/examples/map/flatmap/Dockerfile b/examples/map/flatmap/Dockerfile index 56ccc0db..22d744c0 100644 --- a/examples/map/flatmap/Dockerfile +++ b/examples/map/flatmap/Dockerfile @@ -1,76 +1,50 @@ #################################################################################################### -# builder: install needed dependencies +# Stage 1: Base Builder - installs core dependencies using poetry #################################################################################################### +FROM python:3.10-slim-bullseye AS base-builder -FROM python:3.10-slim-bullseye AS builder - -ENV PYTHONFAULTHANDLER=1 \ - PYTHONUNBUFFERED=1 \ - PYTHONHASHSEED=random \ - PIP_NO_CACHE_DIR=on \ - PIP_DISABLE_PIP_VERSION_CHECK=on \ - PIP_DEFAULT_TIMEOUT=100 \ - POETRY_VERSION=1.2.2 \ - POETRY_HOME="/opt/poetry" \ - POETRY_VIRTUALENVS_IN_PROJECT=true \ - POETRY_NO_INTERACTION=1 \ - PYSETUP_PATH="/opt/pysetup" - -ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/map/flatmap" -ENV VENV_PATH="$EXAMPLE_PATH/.venv" -ENV PATH="$POETRY_HOME/bin:$VENV_PATH/bin:$PATH" +ENV PYSETUP_PATH="/opt/pysetup" +WORKDIR $PYSETUP_PATH -RUN apt-get update \ - && apt-get install --no-install-recommends -y \ - curl \ - wget \ - # deps for building python deps - build-essential \ - && apt-get install -y git \ +# Copy only core dependency files first for better caching +COPY pyproject.toml poetry.lock README.md ./ +COPY pynumaflow/ ./pynumaflow/ +RUN apt-get update && apt-get install --no-install-recommends -y \ + curl wget build-essential git \ && apt-get clean && rm -rf /var/lib/apt/lists/* \ - \ - # install dumb-init - && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ - && chmod +x /dumb-init \ - && curl -sSL https://install.python-poetry.org | python3 - + && pip install poetry \ + && poetry install --no-root --no-interaction -# Copy necessary files and install dependencies -WORKDIR $PYSETUP_PATH -COPY ./ ./ +#################################################################################################### +# Stage 2: UDF Builder - adds UDF code and installs UDF-specific deps +#################################################################################################### +FROM base-builder AS udf-builder + +ENV EXAMPLE_PATH="/opt/pysetup/examples/map/flatmap" +ENV POETRY_VIRTUALENVS_IN_PROJECT=true WORKDIR $EXAMPLE_PATH -RUN poetry lock -RUN poetry install --no-cache --no-root && \ - rm -rf ~/.cache/pypoetry/ +COPY examples/map/flatmap/ ./ +RUN poetry install --no-root --no-interaction #################################################################################################### -# udf: used for running the udf vertices +# Stage 3: UDF Runtime - clean container with only needed stuff #################################################################################################### FROM python:3.10-slim-bullseye AS udf -ENV PYTHONFAULTHANDLER=1 \ - PYTHONUNBUFFERED=1 \ - PYTHONHASHSEED=random \ - PYSETUP_PATH="/opt/pysetup" - +ENV PYSETUP_PATH="/opt/pysetup" ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/map/flatmap" ENV VENV_PATH="$EXAMPLE_PATH/.venv" ENV PATH="$VENV_PATH/bin:$PATH" -# Install only runtime system dependencies -RUN apt-get update \ - && apt-get install --no-install-recommends -y \ - wget \ +RUN apt-get update && apt-get install --no-install-recommends -y wget \ && apt-get clean && rm -rf /var/lib/apt/lists/* \ - \ - # install dumb-init && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ && chmod +x /dumb-init -# Copy only the virtual environment and application code from builder WORKDIR $PYSETUP_PATH -COPY --from=builder $VENV_PATH $VENV_PATH -COPY examples/map/flatmap/ $EXAMPLE_PATH/ +COPY --from=udf-builder $VENV_PATH $VENV_PATH +COPY --from=udf-builder $EXAMPLE_PATH $EXAMPLE_PATH WORKDIR $EXAMPLE_PATH RUN chmod +x entry.sh diff --git a/examples/map/forward_message/Dockerfile b/examples/map/forward_message/Dockerfile index 442e4d05..464fc1fc 100644 --- a/examples/map/forward_message/Dockerfile +++ b/examples/map/forward_message/Dockerfile @@ -1,76 +1,50 @@ #################################################################################################### -# builder: install needed dependencies +# Stage 1: Base Builder - installs core dependencies using poetry #################################################################################################### +FROM python:3.10-slim-bullseye AS base-builder -FROM python:3.10-slim-bullseye AS builder - -ENV PYTHONFAULTHANDLER=1 \ - PYTHONUNBUFFERED=1 \ - PYTHONHASHSEED=random \ - PIP_NO_CACHE_DIR=on \ - PIP_DISABLE_PIP_VERSION_CHECK=on \ - PIP_DEFAULT_TIMEOUT=100 \ - POETRY_VERSION=1.2.2 \ - POETRY_HOME="/opt/poetry" \ - POETRY_VIRTUALENVS_IN_PROJECT=true \ - POETRY_NO_INTERACTION=1 \ - PYSETUP_PATH="/opt/pysetup" - -ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/map/forward_message" -ENV VENV_PATH="$EXAMPLE_PATH/.venv" -ENV PATH="$POETRY_HOME/bin:$VENV_PATH/bin:$PATH" +ENV PYSETUP_PATH="/opt/pysetup" +WORKDIR $PYSETUP_PATH -RUN apt-get update \ - && apt-get install --no-install-recommends -y \ - curl \ - wget \ - # deps for building python deps - build-essential \ - && apt-get install -y git \ +# Copy only core dependency files first for better caching +COPY pyproject.toml poetry.lock README.md ./ +COPY pynumaflow/ ./pynumaflow/ +RUN apt-get update && apt-get install --no-install-recommends -y \ + curl wget build-essential git \ && apt-get clean && rm -rf /var/lib/apt/lists/* \ - \ - # install dumb-init - && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ - && chmod +x /dumb-init \ - && curl -sSL https://install.python-poetry.org | python3 - + && pip install poetry \ + && poetry install --no-root --no-interaction -# Copy necessary files and install dependencies -WORKDIR $PYSETUP_PATH -COPY ./ ./ +#################################################################################################### +# Stage 2: UDF Builder - adds UDF code and installs UDF-specific deps +#################################################################################################### +FROM base-builder AS udf-builder + +ENV EXAMPLE_PATH="/opt/pysetup/examples/map/forward_message" +ENV POETRY_VIRTUALENVS_IN_PROJECT=true WORKDIR $EXAMPLE_PATH -RUN poetry lock -RUN poetry install --no-cache --no-root && \ - rm -rf ~/.cache/pypoetry/ +COPY examples/map/forward_message/ ./ +RUN poetry install --no-root --no-interaction #################################################################################################### -# udf: used for running the udf vertices +# Stage 3: UDF Runtime - clean container with only needed stuff #################################################################################################### FROM python:3.10-slim-bullseye AS udf -ENV PYTHONFAULTHANDLER=1 \ - PYTHONUNBUFFERED=1 \ - PYTHONHASHSEED=random \ - PYSETUP_PATH="/opt/pysetup" - +ENV PYSETUP_PATH="/opt/pysetup" ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/map/forward_message" ENV VENV_PATH="$EXAMPLE_PATH/.venv" ENV PATH="$VENV_PATH/bin:$PATH" -# Install only runtime system dependencies -RUN apt-get update \ - && apt-get install --no-install-recommends -y \ - wget \ +RUN apt-get update && apt-get install --no-install-recommends -y wget \ && apt-get clean && rm -rf /var/lib/apt/lists/* \ - \ - # install dumb-init && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ && chmod +x /dumb-init -# Copy only the virtual environment and application code from builder WORKDIR $PYSETUP_PATH -COPY --from=builder $VENV_PATH $VENV_PATH -COPY examples/map/forward_message/ $EXAMPLE_PATH/ +COPY --from=udf-builder $VENV_PATH $VENV_PATH +COPY --from=udf-builder $EXAMPLE_PATH $EXAMPLE_PATH WORKDIR $EXAMPLE_PATH RUN chmod +x entry.sh diff --git a/examples/map/multiproc_map/Dockerfile b/examples/map/multiproc_map/Dockerfile index 7b878294..3c6e8205 100644 --- a/examples/map/multiproc_map/Dockerfile +++ b/examples/map/multiproc_map/Dockerfile @@ -1,76 +1,50 @@ #################################################################################################### -# builder: install needed dependencies +# Stage 1: Base Builder - installs core dependencies using poetry #################################################################################################### +FROM python:3.10-slim-bullseye AS base-builder -FROM python:3.10-slim-bullseye AS builder - -ENV PYTHONFAULTHANDLER=1 \ - PYTHONUNBUFFERED=1 \ - PYTHONHASHSEED=random \ - PIP_NO_CACHE_DIR=on \ - PIP_DISABLE_PIP_VERSION_CHECK=on \ - PIP_DEFAULT_TIMEOUT=100 \ - POETRY_VERSION=1.2.2 \ - POETRY_HOME="/opt/poetry" \ - POETRY_VIRTUALENVS_IN_PROJECT=true \ - POETRY_NO_INTERACTION=1 \ - PYSETUP_PATH="/opt/pysetup" - -ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/map/multiproc_map" -ENV VENV_PATH="$EXAMPLE_PATH/.venv" -ENV PATH="$POETRY_HOME/bin:$VENV_PATH/bin:$PATH" +ENV PYSETUP_PATH="/opt/pysetup" +WORKDIR $PYSETUP_PATH -RUN apt-get update \ - && apt-get install --no-install-recommends -y \ - curl \ - wget \ - # deps for building python deps - build-essential \ - && apt-get install -y git \ +# Copy only core dependency files first for better caching +COPY pyproject.toml poetry.lock README.md ./ +COPY pynumaflow/ ./pynumaflow/ +RUN apt-get update && apt-get install --no-install-recommends -y \ + curl wget build-essential git \ && apt-get clean && rm -rf /var/lib/apt/lists/* \ - \ - # install dumb-init - && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ - && chmod +x /dumb-init \ - && curl -sSL https://install.python-poetry.org | python3 - + && pip install poetry \ + && poetry install --no-root --no-interaction -# Copy necessary files and install dependencies -WORKDIR $PYSETUP_PATH -COPY ./ ./ +#################################################################################################### +# Stage 2: UDF Builder - adds UDF code and installs UDF-specific deps +#################################################################################################### +FROM base-builder AS udf-builder + +ENV EXAMPLE_PATH="/opt/pysetup/examples/map/multiproc_map" +ENV POETRY_VIRTUALENVS_IN_PROJECT=true WORKDIR $EXAMPLE_PATH -RUN poetry lock -RUN poetry install --no-cache --no-root && \ - rm -rf ~/.cache/pypoetry/ +COPY examples/map/multiproc_map/ ./ +RUN poetry install --no-root --no-interaction #################################################################################################### -# udf: used for running the udf vertices +# Stage 3: UDF Runtime - clean container with only needed stuff #################################################################################################### FROM python:3.10-slim-bullseye AS udf -ENV PYTHONFAULTHANDLER=1 \ - PYTHONUNBUFFERED=1 \ - PYTHONHASHSEED=random \ - PYSETUP_PATH="/opt/pysetup" - +ENV PYSETUP_PATH="/opt/pysetup" ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/map/multiproc_map" ENV VENV_PATH="$EXAMPLE_PATH/.venv" ENV PATH="$VENV_PATH/bin:$PATH" -# Install only runtime system dependencies -RUN apt-get update \ - && apt-get install --no-install-recommends -y \ - wget \ +RUN apt-get update && apt-get install --no-install-recommends -y wget \ && apt-get clean && rm -rf /var/lib/apt/lists/* \ - \ - # install dumb-init && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ && chmod +x /dumb-init -# Copy only the virtual environment and application code from builder WORKDIR $PYSETUP_PATH -COPY --from=builder $VENV_PATH $VENV_PATH -COPY examples/map/multiproc_map/ $EXAMPLE_PATH/ +COPY --from=udf-builder $VENV_PATH $VENV_PATH +COPY --from=udf-builder $EXAMPLE_PATH $EXAMPLE_PATH WORKDIR $EXAMPLE_PATH RUN chmod +x entry.sh diff --git a/examples/mapstream/flatmap_stream/Dockerfile b/examples/mapstream/flatmap_stream/Dockerfile index 25e52a05..e56d7fb5 100644 --- a/examples/mapstream/flatmap_stream/Dockerfile +++ b/examples/mapstream/flatmap_stream/Dockerfile @@ -1,76 +1,50 @@ #################################################################################################### -# builder: install needed dependencies +# Stage 1: Base Builder - installs core dependencies using poetry #################################################################################################### +FROM python:3.10-slim-bullseye AS base-builder -FROM python:3.10-slim-bullseye AS builder - -ENV PYTHONFAULTHANDLER=1 \ - PYTHONUNBUFFERED=1 \ - PYTHONHASHSEED=random \ - PIP_NO_CACHE_DIR=on \ - PIP_DISABLE_PIP_VERSION_CHECK=on \ - PIP_DEFAULT_TIMEOUT=100 \ - POETRY_VERSION=1.2.2 \ - POETRY_HOME="/opt/poetry" \ - POETRY_VIRTUALENVS_IN_PROJECT=true \ - POETRY_NO_INTERACTION=1 \ - PYSETUP_PATH="/opt/pysetup" - -ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/mapstream/flatmap_stream" -ENV VENV_PATH="$EXAMPLE_PATH/.venv" -ENV PATH="$POETRY_HOME/bin:$VENV_PATH/bin:$PATH" +ENV PYSETUP_PATH="/opt/pysetup" +WORKDIR $PYSETUP_PATH -RUN apt-get update \ - && apt-get install --no-install-recommends -y \ - curl \ - wget \ - # deps for building python deps - build-essential \ - && apt-get install -y git \ +# Copy only core dependency files first for better caching +COPY pyproject.toml poetry.lock README.md ./ +COPY pynumaflow/ ./pynumaflow/ +RUN apt-get update && apt-get install --no-install-recommends -y \ + curl wget build-essential git \ && apt-get clean && rm -rf /var/lib/apt/lists/* \ - \ - # install dumb-init - && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ - && chmod +x /dumb-init \ - && curl -sSL https://install.python-poetry.org | python3 - + && pip install poetry \ + && poetry install --no-root --no-interaction -# Copy necessary files and install dependencies -WORKDIR $PYSETUP_PATH -COPY ./ ./ +#################################################################################################### +# Stage 2: UDF Builder - adds UDF code and installs UDF-specific deps +#################################################################################################### +FROM base-builder AS udf-builder + +ENV EXAMPLE_PATH="/opt/pysetup/examples/mapstream/flatmap_stream" +ENV POETRY_VIRTUALENVS_IN_PROJECT=true WORKDIR $EXAMPLE_PATH -RUN poetry lock -RUN poetry install --no-cache --no-root && \ - rm -rf ~/.cache/pypoetry/ +COPY examples/mapstream/flatmap_stream/ ./ +RUN poetry install --no-root --no-interaction #################################################################################################### -# udf: used for running the udf vertices +# Stage 3: UDF Runtime - clean container with only needed stuff #################################################################################################### FROM python:3.10-slim-bullseye AS udf -ENV PYTHONFAULTHANDLER=1 \ - PYTHONUNBUFFERED=1 \ - PYTHONHASHSEED=random \ - PYSETUP_PATH="/opt/pysetup" - +ENV PYSETUP_PATH="/opt/pysetup" ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/mapstream/flatmap_stream" ENV VENV_PATH="$EXAMPLE_PATH/.venv" ENV PATH="$VENV_PATH/bin:$PATH" -# Install only runtime system dependencies -RUN apt-get update \ - && apt-get install --no-install-recommends -y \ - wget \ +RUN apt-get update && apt-get install --no-install-recommends -y wget \ && apt-get clean && rm -rf /var/lib/apt/lists/* \ - \ - # install dumb-init && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ && chmod +x /dumb-init -# Copy only the virtual environment and application code from builder WORKDIR $PYSETUP_PATH -COPY --from=builder $VENV_PATH $VENV_PATH -COPY examples/mapstream/flatmap_stream/ $EXAMPLE_PATH/ +COPY --from=udf-builder $VENV_PATH $VENV_PATH +COPY --from=udf-builder $EXAMPLE_PATH $EXAMPLE_PATH WORKDIR $EXAMPLE_PATH RUN chmod +x entry.sh diff --git a/examples/reduce/asyncio_reduce/Dockerfile b/examples/reduce/asyncio_reduce/Dockerfile index 72da7ea4..e74b6036 100644 --- a/examples/reduce/asyncio_reduce/Dockerfile +++ b/examples/reduce/asyncio_reduce/Dockerfile @@ -1,76 +1,55 @@ #################################################################################################### -# builder: install needed dependencies +# Stage 1: Base Builder - installs core dependencies using poetry #################################################################################################### +FROM python:3.10-slim-bullseye AS base-builder -FROM python:3.10-slim-bullseye AS builder +ENV PYSETUP_PATH="/opt/pysetup" +WORKDIR $PYSETUP_PATH -ENV PYTHONFAULTHANDLER=1 \ - PYTHONUNBUFFERED=1 \ - PYTHONHASHSEED=random \ - PIP_NO_CACHE_DIR=on \ - PIP_DISABLE_PIP_VERSION_CHECK=on \ - PIP_DEFAULT_TIMEOUT=100 \ - POETRY_VERSION=1.2.2 \ - POETRY_HOME="/opt/poetry" \ - POETRY_VIRTUALENVS_IN_PROJECT=true \ - POETRY_NO_INTERACTION=1 \ - PYSETUP_PATH="/opt/pysetup" \ - VENV_PATH="/opt/pysetup/.venv" +# Copy only core dependency files first for better caching +COPY pyproject.toml poetry.lock README.md ./ +COPY pynumaflow/ ./pynumaflow/ +RUN apt-get update && apt-get install --no-install-recommends -y \ + curl wget build-essential git \ + && apt-get clean && rm -rf /var/lib/apt/lists/* \ + && pip install poetry \ + && poetry install --no-root --no-interaction -ENV PATH="$POETRY_HOME/bin:$VENV_PATH/bin:$PATH" +#################################################################################################### +# Stage 2: UDF Builder - adds UDF code and installs UDF-specific deps +#################################################################################################### +FROM base-builder AS udf-builder -RUN apt-get update \ - && apt-get install --no-install-recommends -y \ - curl \ - wget \ - # deps for building python deps - build-essential \ - && apt-get install -y git \ - && apt-get clean && rm -rf /var/lib/apt/lists/* \ - \ - # install dumb-init - && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ - && chmod +x /dumb-init \ - && curl -sSL https://install.python-poetry.org | python3 - +ENV EXAMPLE_PATH="/opt/pysetup/examples/reduce/asyncio_reduce" +ENV POETRY_VIRTUALENVS_IN_PROJECT=true -# Copy necessary files and install dependencies -WORKDIR $PYSETUP_PATH -COPY pyproject.toml ./ -RUN poetry install --no-cache --no-root && \ - rm -rf ~/.cache/pypoetry/ +WORKDIR $EXAMPLE_PATH +COPY examples/reduce/asyncio_reduce/ ./ +RUN poetry install --no-root --no-interaction #################################################################################################### -# udf: used for running the udf vertices +# Stage 3: UDF Runtime - clean container with only needed stuff #################################################################################################### FROM python:3.10-slim-bullseye AS udf -ENV PYTHONFAULTHANDLER=1 \ - PYTHONUNBUFFERED=1 \ - PYTHONHASHSEED=random \ - PYSETUP_PATH="/opt/pysetup" \ - VENV_PATH="/opt/pysetup/.venv" - +ENV PYSETUP_PATH="/opt/pysetup" +ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/reduce/asyncio_reduce" +ENV VENV_PATH="$EXAMPLE_PATH/.venv" ENV PATH="$VENV_PATH/bin:$PATH" -# Install only runtime system dependencies -RUN apt-get update \ - && apt-get install --no-install-recommends -y \ - wget \ +RUN apt-get update && apt-get install --no-install-recommends -y wget \ && apt-get clean && rm -rf /var/lib/apt/lists/* \ - \ - # install dumb-init && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ && chmod +x /dumb-init -# Copy only the virtual environment and application code from builder WORKDIR $PYSETUP_PATH -COPY --from=builder $VENV_PATH $VENV_PATH -ADD . /app -WORKDIR /app +COPY --from=udf-builder $VENV_PATH $VENV_PATH +COPY --from=udf-builder $EXAMPLE_PATH $EXAMPLE_PATH +WORKDIR $EXAMPLE_PATH RUN chmod +x entry.sh ENTRYPOINT ["/dumb-init", "--"] -CMD ["/app/entry.sh"] +CMD ["sh", "-c", "$EXAMPLE_PATH/entry.sh"] EXPOSE 5000 diff --git a/examples/reduce/asyncio_reduce/pyproject.toml b/examples/reduce/asyncio_reduce/pyproject.toml index 31cce969..cac90449 100644 --- a/examples/reduce/asyncio_reduce/pyproject.toml +++ b/examples/reduce/asyncio_reduce/pyproject.toml @@ -7,7 +7,7 @@ authors = ["Numaflow developers"] [tool.poetry.dependencies] python = "~3.10" pynumaflow = "~0.6.0" -aiorun = "^2022.11.1" +aiorun = ">=2023.7,<2024.0" aiohttp = "~3.8.4" asyncio = "~3.4.3" diff --git a/examples/reduce/batchmap/flatmap/Dockerfile b/examples/reduce/batchmap/flatmap/Dockerfile new file mode 100644 index 00000000..a70d0d90 --- /dev/null +++ b/examples/reduce/batchmap/flatmap/Dockerfile @@ -0,0 +1,55 @@ +#################################################################################################### +# Stage 1: Base Builder - installs core dependencies using poetry +#################################################################################################### +FROM python:3.10-slim-bullseye AS base-builder + +ENV PYSETUP_PATH="/opt/pysetup" +WORKDIR $PYSETUP_PATH + +# Copy only core dependency files first for better caching +COPY pyproject.toml poetry.lock README.md ./ +COPY pynumaflow/ ./pynumaflow/ +RUN apt-get update && apt-get install --no-install-recommends -y \ + curl wget build-essential git \ + && apt-get clean && rm -rf /var/lib/apt/lists/* \ + && pip install poetry \ + && poetry install --no-root --no-interaction + +#################################################################################################### +# Stage 2: UDF Builder - adds UDF code and installs UDF-specific deps +#################################################################################################### +FROM base-builder AS udf-builder + +ENV EXAMPLE_PATH="/opt/pysetup/examples/reduce/batchmap/flatmap" +ENV POETRY_VIRTUALENVS_IN_PROJECT=true + +WORKDIR $EXAMPLE_PATH +COPY examples/reduce/batchmap/flatmap/ ./ +RUN poetry install --no-root --no-interaction + +#################################################################################################### +# Stage 3: UDF Runtime - clean container with only needed stuff +#################################################################################################### +FROM python:3.10-slim-bullseye AS udf + +ENV PYSETUP_PATH="/opt/pysetup" +ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/reduce/batchmap/flatmap" +ENV VENV_PATH="$EXAMPLE_PATH/.venv" +ENV PATH="$VENV_PATH/bin:$PATH" + +RUN apt-get update && apt-get install --no-install-recommends -y wget \ + && apt-get clean && rm -rf /var/lib/apt/lists/* \ + && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ + && chmod +x /dumb-init + +WORKDIR $PYSETUP_PATH +COPY --from=udf-builder $VENV_PATH $VENV_PATH +COPY --from=udf-builder $EXAMPLE_PATH $EXAMPLE_PATH + +WORKDIR $EXAMPLE_PATH +RUN chmod +x entry.sh + +ENTRYPOINT ["/dumb-init", "--"] +CMD ["sh", "-c", "$EXAMPLE_PATH/entry.sh"] + +EXPOSE 5000 \ No newline at end of file diff --git a/examples/reduce/counter/Dockerfile b/examples/reduce/counter/Dockerfile index a99be083..f25a9c46 100644 --- a/examples/reduce/counter/Dockerfile +++ b/examples/reduce/counter/Dockerfile @@ -1,76 +1,50 @@ #################################################################################################### -# builder: install needed dependencies +# Stage 1: Base Builder - installs core dependencies using poetry #################################################################################################### +FROM python:3.10-slim-bullseye AS base-builder -FROM python:3.10-slim-bullseye AS builder - -ENV PYTHONFAULTHANDLER=1 \ - PYTHONUNBUFFERED=1 \ - PYTHONHASHSEED=random \ - PIP_NO_CACHE_DIR=on \ - PIP_DISABLE_PIP_VERSION_CHECK=on \ - PIP_DEFAULT_TIMEOUT=100 \ - POETRY_VERSION=1.2.2 \ - POETRY_HOME="/opt/poetry" \ - POETRY_VIRTUALENVS_IN_PROJECT=true \ - POETRY_NO_INTERACTION=1 \ - PYSETUP_PATH="/opt/pysetup" - -ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/reduce/counter" -ENV VENV_PATH="$EXAMPLE_PATH/.venv" -ENV PATH="$POETRY_HOME/bin:$VENV_PATH/bin:$PATH" +ENV PYSETUP_PATH="/opt/pysetup" +WORKDIR $PYSETUP_PATH -RUN apt-get update \ - && apt-get install --no-install-recommends -y \ - curl \ - wget \ - # deps for building python deps - build-essential \ - && apt-get install -y git \ +# Copy only core dependency files first for better caching +COPY pyproject.toml poetry.lock README.md ./ +COPY pynumaflow/ ./pynumaflow/ +RUN apt-get update && apt-get install --no-install-recommends -y \ + curl wget build-essential git \ && apt-get clean && rm -rf /var/lib/apt/lists/* \ - \ - # install dumb-init - && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ - && chmod +x /dumb-init \ - && curl -sSL https://install.python-poetry.org | python3 - + && pip install poetry \ + && poetry install --no-root --no-interaction -# Copy necessary files and install dependencies -WORKDIR $PYSETUP_PATH -COPY ./ ./ +#################################################################################################### +# Stage 2: UDF Builder - adds UDF code and installs UDF-specific deps +#################################################################################################### +FROM base-builder AS udf-builder + +ENV EXAMPLE_PATH="/opt/pysetup/examples/reduce/counter" +ENV POETRY_VIRTUALENVS_IN_PROJECT=true WORKDIR $EXAMPLE_PATH -RUN poetry lock -RUN poetry install --no-cache --no-root && \ - rm -rf ~/.cache/pypoetry/ +COPY examples/reduce/counter/ ./ +RUN poetry install --no-root --no-interaction #################################################################################################### -# udf: used for running the udf vertices +# Stage 3: UDF Runtime - clean container with only needed stuff #################################################################################################### FROM python:3.10-slim-bullseye AS udf -ENV PYTHONFAULTHANDLER=1 \ - PYTHONUNBUFFERED=1 \ - PYTHONHASHSEED=random \ - PYSETUP_PATH="/opt/pysetup" - +ENV PYSETUP_PATH="/opt/pysetup" ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/reduce/counter" ENV VENV_PATH="$EXAMPLE_PATH/.venv" ENV PATH="$VENV_PATH/bin:$PATH" -# Install only runtime system dependencies -RUN apt-get update \ - && apt-get install --no-install-recommends -y \ - wget \ +RUN apt-get update && apt-get install --no-install-recommends -y wget \ && apt-get clean && rm -rf /var/lib/apt/lists/* \ - \ - # install dumb-init && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ && chmod +x /dumb-init -# Copy only the virtual environment and application code from builder WORKDIR $PYSETUP_PATH -COPY --from=builder $VENV_PATH $VENV_PATH -COPY examples/reduce/counter/ $EXAMPLE_PATH/ +COPY --from=udf-builder $VENV_PATH $VENV_PATH +COPY --from=udf-builder $EXAMPLE_PATH $EXAMPLE_PATH WORKDIR $EXAMPLE_PATH RUN chmod +x entry.sh diff --git a/examples/reducestream/counter/Dockerfile b/examples/reducestream/counter/Dockerfile index c67cebff..f26543d7 100644 --- a/examples/reducestream/counter/Dockerfile +++ b/examples/reducestream/counter/Dockerfile @@ -1,76 +1,50 @@ #################################################################################################### -# builder: install needed dependencies +# Stage 1: Base Builder - installs core dependencies using poetry #################################################################################################### +FROM python:3.10-slim-bullseye AS base-builder -FROM python:3.10-slim-bullseye AS builder - -ENV PYTHONFAULTHANDLER=1 \ - PYTHONUNBUFFERED=1 \ - PYTHONHASHSEED=random \ - PIP_NO_CACHE_DIR=on \ - PIP_DISABLE_PIP_VERSION_CHECK=on \ - PIP_DEFAULT_TIMEOUT=100 \ - POETRY_VERSION=1.2.2 \ - POETRY_HOME="/opt/poetry" \ - POETRY_VIRTUALENVS_IN_PROJECT=true \ - POETRY_NO_INTERACTION=1 \ - PYSETUP_PATH="/opt/pysetup" - -ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/reducestream/counter" -ENV VENV_PATH="$EXAMPLE_PATH/.venv" -ENV PATH="$POETRY_HOME/bin:$VENV_PATH/bin:$PATH" +ENV PYSETUP_PATH="/opt/pysetup" +WORKDIR $PYSETUP_PATH -RUN apt-get update \ - && apt-get install --no-install-recommends -y \ - curl \ - wget \ - # deps for building python deps - build-essential \ - && apt-get install -y git \ +# Copy only core dependency files first for better caching +COPY pyproject.toml poetry.lock README.md ./ +COPY pynumaflow/ ./pynumaflow/ +RUN apt-get update && apt-get install --no-install-recommends -y \ + curl wget build-essential git \ && apt-get clean && rm -rf /var/lib/apt/lists/* \ - \ - # install dumb-init - && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ - && chmod +x /dumb-init \ - && curl -sSL https://install.python-poetry.org | python3 - + && pip install poetry \ + && poetry install --no-root --no-interaction -# Copy necessary files and install dependencies -WORKDIR $PYSETUP_PATH -COPY ./ ./ +#################################################################################################### +# Stage 2: UDF Builder - adds UDF code and installs UDF-specific deps +#################################################################################################### +FROM base-builder AS udf-builder + +ENV EXAMPLE_PATH="/opt/pysetup/examples/reducestream/counter" +ENV POETRY_VIRTUALENVS_IN_PROJECT=true WORKDIR $EXAMPLE_PATH -RUN poetry lock -RUN poetry install --no-cache --no-root && \ - rm -rf ~/.cache/pypoetry/ +COPY examples/reducestream/counter/ ./ +RUN poetry install --no-root --no-interaction #################################################################################################### -# udf: used for running the udf vertices +# Stage 3: UDF Runtime - clean container with only needed stuff #################################################################################################### FROM python:3.10-slim-bullseye AS udf -ENV PYTHONFAULTHANDLER=1 \ - PYTHONUNBUFFERED=1 \ - PYTHONHASHSEED=random \ - PYSETUP_PATH="/opt/pysetup" - +ENV PYSETUP_PATH="/opt/pysetup" ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/reducestream/counter" ENV VENV_PATH="$EXAMPLE_PATH/.venv" ENV PATH="$VENV_PATH/bin:$PATH" -# Install only runtime system dependencies -RUN apt-get update \ - && apt-get install --no-install-recommends -y \ - wget \ +RUN apt-get update && apt-get install --no-install-recommends -y wget \ && apt-get clean && rm -rf /var/lib/apt/lists/* \ - \ - # install dumb-init && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ && chmod +x /dumb-init -# Copy only the virtual environment and application code from builder WORKDIR $PYSETUP_PATH -COPY --from=builder $VENV_PATH $VENV_PATH -COPY examples/reducestream/counter/ $EXAMPLE_PATH/ +COPY --from=udf-builder $VENV_PATH $VENV_PATH +COPY --from=udf-builder $EXAMPLE_PATH $EXAMPLE_PATH WORKDIR $EXAMPLE_PATH RUN chmod +x entry.sh diff --git a/examples/reducestream/sum/Dockerfile b/examples/reducestream/sum/Dockerfile index 2823efb9..4b372b78 100644 --- a/examples/reducestream/sum/Dockerfile +++ b/examples/reducestream/sum/Dockerfile @@ -1,76 +1,50 @@ #################################################################################################### -# builder: install needed dependencies +# Stage 1: Base Builder - installs core dependencies using poetry #################################################################################################### +FROM python:3.10-slim-bullseye AS base-builder -FROM python:3.10-slim-bullseye AS builder - -ENV PYTHONFAULTHANDLER=1 \ - PYTHONUNBUFFERED=1 \ - PYTHONHASHSEED=random \ - PIP_NO_CACHE_DIR=on \ - PIP_DISABLE_PIP_VERSION_CHECK=on \ - PIP_DEFAULT_TIMEOUT=100 \ - POETRY_VERSION=1.2.2 \ - POETRY_HOME="/opt/poetry" \ - POETRY_VIRTUALENVS_IN_PROJECT=true \ - POETRY_NO_INTERACTION=1 \ - PYSETUP_PATH="/opt/pysetup" - -ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/reducestream/sum" -ENV VENV_PATH="$EXAMPLE_PATH/.venv" -ENV PATH="$POETRY_HOME/bin:$VENV_PATH/bin:$PATH" +ENV PYSETUP_PATH="/opt/pysetup" +WORKDIR $PYSETUP_PATH -RUN apt-get update \ - && apt-get install --no-install-recommends -y \ - curl \ - wget \ - # deps for building python deps - build-essential \ - && apt-get install -y git \ +# Copy only core dependency files first for better caching +COPY pyproject.toml poetry.lock README.md ./ +COPY pynumaflow/ ./pynumaflow/ +RUN apt-get update && apt-get install --no-install-recommends -y \ + curl wget build-essential git \ && apt-get clean && rm -rf /var/lib/apt/lists/* \ - \ - # install dumb-init - && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ - && chmod +x /dumb-init \ - && curl -sSL https://install.python-poetry.org | python3 - + && pip install poetry \ + && poetry install --no-root --no-interaction -# Copy necessary files and install dependencies -WORKDIR $PYSETUP_PATH -COPY ./ ./ +#################################################################################################### +# Stage 2: UDF Builder - adds UDF code and installs UDF-specific deps +#################################################################################################### +FROM base-builder AS udf-builder + +ENV EXAMPLE_PATH="/opt/pysetup/examples/reducestream/sum" +ENV POETRY_VIRTUALENVS_IN_PROJECT=true WORKDIR $EXAMPLE_PATH -RUN poetry lock -RUN poetry install --no-cache --no-root && \ - rm -rf ~/.cache/pypoetry/ +COPY examples/reducestream/sum/ ./ +RUN poetry install --no-root --no-interaction #################################################################################################### -# udf: used for running the udf vertices +# Stage 3: UDF Runtime - clean container with only needed stuff #################################################################################################### FROM python:3.10-slim-bullseye AS udf -ENV PYTHONFAULTHANDLER=1 \ - PYTHONUNBUFFERED=1 \ - PYTHONHASHSEED=random \ - PYSETUP_PATH="/opt/pysetup" - +ENV PYSETUP_PATH="/opt/pysetup" ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/reducestream/sum" ENV VENV_PATH="$EXAMPLE_PATH/.venv" ENV PATH="$VENV_PATH/bin:$PATH" -# Install only runtime system dependencies -RUN apt-get update \ - && apt-get install --no-install-recommends -y \ - wget \ +RUN apt-get update && apt-get install --no-install-recommends -y wget \ && apt-get clean && rm -rf /var/lib/apt/lists/* \ - \ - # install dumb-init && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ && chmod +x /dumb-init -# Copy only the virtual environment and application code from builder WORKDIR $PYSETUP_PATH -COPY --from=builder $VENV_PATH $VENV_PATH -COPY examples/reducestream/sum/ $EXAMPLE_PATH/ +COPY --from=udf-builder $VENV_PATH $VENV_PATH +COPY --from=udf-builder $EXAMPLE_PATH $EXAMPLE_PATH WORKDIR $EXAMPLE_PATH RUN chmod +x entry.sh diff --git a/examples/sideinput/simple_sideinput/Dockerfile b/examples/sideinput/simple_sideinput/Dockerfile index 3cc98025..47085100 100644 --- a/examples/sideinput/simple_sideinput/Dockerfile +++ b/examples/sideinput/simple_sideinput/Dockerfile @@ -1,76 +1,50 @@ #################################################################################################### -# builder: install needed dependencies +# Stage 1: Base Builder - installs core dependencies using poetry #################################################################################################### +FROM python:3.10-slim-bullseye AS base-builder -FROM python:3.10-slim-bullseye AS builder - -ENV PYTHONFAULTHANDLER=1 \ - PYTHONUNBUFFERED=1 \ - PYTHONHASHSEED=random \ - PIP_NO_CACHE_DIR=on \ - PIP_DISABLE_PIP_VERSION_CHECK=on \ - PIP_DEFAULT_TIMEOUT=100 \ - POETRY_VERSION=1.2.2 \ - POETRY_HOME="/opt/poetry" \ - POETRY_VIRTUALENVS_IN_PROJECT=true \ - POETRY_NO_INTERACTION=1 \ - PYSETUP_PATH="/opt/pysetup" - -ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/sideinput/simple_sideinput" -ENV VENV_PATH="$EXAMPLE_PATH/.venv" -ENV PATH="$POETRY_HOME/bin:$VENV_PATH/bin:$PATH" +ENV PYSETUP_PATH="/opt/pysetup" +WORKDIR $PYSETUP_PATH -RUN apt-get update \ - && apt-get install --no-install-recommends -y \ - curl \ - wget \ - # deps for building python deps - build-essential \ - && apt-get install -y git \ +# Copy only core dependency files first for better caching +COPY pyproject.toml poetry.lock README.md ./ +COPY pynumaflow/ ./pynumaflow/ +RUN apt-get update && apt-get install --no-install-recommends -y \ + curl wget build-essential git \ && apt-get clean && rm -rf /var/lib/apt/lists/* \ - \ - # install dumb-init - && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ - && chmod +x /dumb-init \ - && curl -sSL https://install.python-poetry.org | python3 - + && pip install poetry \ + && poetry install --no-root --no-interaction -# Copy necessary files and install dependencies -WORKDIR $PYSETUP_PATH -COPY ./ ./ +#################################################################################################### +# Stage 2: UDF Builder - adds UDF code and installs UDF-specific deps +#################################################################################################### +FROM base-builder AS udf-builder + +ENV EXAMPLE_PATH="/opt/pysetup/examples/sideinput/simple_sideinput" +ENV POETRY_VIRTUALENVS_IN_PROJECT=true WORKDIR $EXAMPLE_PATH -RUN poetry lock -RUN poetry install --no-cache --no-root && \ - rm -rf ~/.cache/pypoetry/ +COPY examples/sideinput/simple_sideinput/ ./ +RUN poetry install --no-root --no-interaction #################################################################################################### -# udf: used for running the udf vertices +# Stage 3: UDF Runtime - clean container with only needed stuff #################################################################################################### FROM python:3.10-slim-bullseye AS udf -ENV PYTHONFAULTHANDLER=1 \ - PYTHONUNBUFFERED=1 \ - PYTHONHASHSEED=random \ - PYSETUP_PATH="/opt/pysetup" - +ENV PYSETUP_PATH="/opt/pysetup" ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/sideinput/simple_sideinput" ENV VENV_PATH="$EXAMPLE_PATH/.venv" ENV PATH="$VENV_PATH/bin:$PATH" -# Install only runtime system dependencies -RUN apt-get update \ - && apt-get install --no-install-recommends -y \ - wget \ +RUN apt-get update && apt-get install --no-install-recommends -y wget \ && apt-get clean && rm -rf /var/lib/apt/lists/* \ - \ - # install dumb-init && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ && chmod +x /dumb-init -# Copy only the virtual environment and application code from builder WORKDIR $PYSETUP_PATH -COPY --from=builder $VENV_PATH $VENV_PATH -COPY examples/sideinput/simple_sideinput/ $EXAMPLE_PATH/ +COPY --from=udf-builder $VENV_PATH $VENV_PATH +COPY --from=udf-builder $EXAMPLE_PATH $EXAMPLE_PATH WORKDIR $EXAMPLE_PATH RUN chmod +x entry.sh diff --git a/examples/sideinput/simple_sideinput/udf/Dockerfile b/examples/sideinput/simple_sideinput/udf/Dockerfile index 5bc7cbaf..50cc8578 100644 --- a/examples/sideinput/simple_sideinput/udf/Dockerfile +++ b/examples/sideinput/simple_sideinput/udf/Dockerfile @@ -1,76 +1,50 @@ #################################################################################################### -# builder: install needed dependencies +# Stage 1: Base Builder - installs core dependencies using poetry #################################################################################################### +FROM python:3.10-slim-bullseye AS base-builder -FROM python:3.10-slim-bullseye AS builder - -ENV PYTHONFAULTHANDLER=1 \ - PYTHONUNBUFFERED=1 \ - PYTHONHASHSEED=random \ - PIP_NO_CACHE_DIR=on \ - PIP_DISABLE_PIP_VERSION_CHECK=on \ - PIP_DEFAULT_TIMEOUT=100 \ - POETRY_VERSION=1.2.2 \ - POETRY_HOME="/opt/poetry" \ - POETRY_VIRTUALENVS_IN_PROJECT=true \ - POETRY_NO_INTERACTION=1 \ - PYSETUP_PATH="/opt/pysetup" - -ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/sideinput/simple_sideinput/udf" -ENV VENV_PATH="$EXAMPLE_PATH/.venv" -ENV PATH="$POETRY_HOME/bin:$VENV_PATH/bin:$PATH" +ENV PYSETUP_PATH="/opt/pysetup" +WORKDIR $PYSETUP_PATH -RUN apt-get update \ - && apt-get install --no-install-recommends -y \ - curl \ - wget \ - # deps for building python deps - build-essential \ - && apt-get install -y git \ +# Copy only core dependency files first for better caching +COPY pyproject.toml poetry.lock README.md ./ +COPY pynumaflow/ ./pynumaflow/ +RUN apt-get update && apt-get install --no-install-recommends -y \ + curl wget build-essential git \ && apt-get clean && rm -rf /var/lib/apt/lists/* \ - \ - # install dumb-init - && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ - && chmod +x /dumb-init \ - && curl -sSL https://install.python-poetry.org | python3 - + && pip install poetry \ + && poetry install --no-root --no-interaction -# Copy necessary files and install dependencies -WORKDIR $PYSETUP_PATH -COPY ./ ./ +#################################################################################################### +# Stage 2: UDF Builder - adds UDF code and installs UDF-specific deps +#################################################################################################### +FROM base-builder AS udf-builder + +ENV EXAMPLE_PATH="/opt/pysetup/examples/sideinput/simple_sideinput/udf" +ENV POETRY_VIRTUALENVS_IN_PROJECT=true WORKDIR $EXAMPLE_PATH -RUN poetry lock -RUN poetry install --no-cache --no-root && \ - rm -rf ~/.cache/pypoetry/ +COPY examples/sideinput/simple_sideinput/udf/ ./ +RUN poetry install --no-root --no-interaction #################################################################################################### -# udf: used for running the udf vertices +# Stage 3: UDF Runtime - clean container with only needed stuff #################################################################################################### FROM python:3.10-slim-bullseye AS udf -ENV PYTHONFAULTHANDLER=1 \ - PYTHONUNBUFFERED=1 \ - PYTHONHASHSEED=random \ - PYSETUP_PATH="/opt/pysetup" - +ENV PYSETUP_PATH="/opt/pysetup" ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/sideinput/simple_sideinput/udf" ENV VENV_PATH="$EXAMPLE_PATH/.venv" ENV PATH="$VENV_PATH/bin:$PATH" -# Install only runtime system dependencies -RUN apt-get update \ - && apt-get install --no-install-recommends -y \ - wget \ +RUN apt-get update && apt-get install --no-install-recommends -y wget \ && apt-get clean && rm -rf /var/lib/apt/lists/* \ - \ - # install dumb-init && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ && chmod +x /dumb-init -# Copy only the virtual environment and application code from builder WORKDIR $PYSETUP_PATH -COPY --from=builder $VENV_PATH $VENV_PATH -COPY examples/sideinput/simple_sideinput/udf/ $EXAMPLE_PATH/ +COPY --from=udf-builder $VENV_PATH $VENV_PATH +COPY --from=udf-builder $EXAMPLE_PATH $EXAMPLE_PATH WORKDIR $EXAMPLE_PATH RUN chmod +x entry.sh diff --git a/examples/sink/async_log/Dockerfile b/examples/sink/async_log/Dockerfile index a8a53f9a..4448c3a8 100644 --- a/examples/sink/async_log/Dockerfile +++ b/examples/sink/async_log/Dockerfile @@ -1,76 +1,50 @@ #################################################################################################### -# builder: install needed dependencies +# Stage 1: Base Builder - installs core dependencies using poetry #################################################################################################### +FROM python:3.10-slim-bullseye AS base-builder -FROM python:3.10-slim-bullseye AS builder - -ENV PYTHONFAULTHANDLER=1 \ - PYTHONUNBUFFERED=1 \ - PYTHONHASHSEED=random \ - PIP_NO_CACHE_DIR=on \ - PIP_DISABLE_PIP_VERSION_CHECK=on \ - PIP_DEFAULT_TIMEOUT=100 \ - POETRY_VERSION=1.2.2 \ - POETRY_HOME="/opt/poetry" \ - POETRY_VIRTUALENVS_IN_PROJECT=true \ - POETRY_NO_INTERACTION=1 \ - PYSETUP_PATH="/opt/pysetup" - -ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/sink/async_log" -ENV VENV_PATH="$EXAMPLE_PATH/.venv" -ENV PATH="$POETRY_HOME/bin:$VENV_PATH/bin:$PATH" +ENV PYSETUP_PATH="/opt/pysetup" +WORKDIR $PYSETUP_PATH -RUN apt-get update \ - && apt-get install --no-install-recommends -y \ - curl \ - wget \ - # deps for building python deps - build-essential \ - && apt-get install -y git \ +# Copy only core dependency files first for better caching +COPY pyproject.toml poetry.lock README.md ./ +COPY pynumaflow/ ./pynumaflow/ +RUN apt-get update && apt-get install --no-install-recommends -y \ + curl wget build-essential git \ && apt-get clean && rm -rf /var/lib/apt/lists/* \ - \ - # install dumb-init - && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ - && chmod +x /dumb-init \ - && curl -sSL https://install.python-poetry.org | python3 - + && pip install poetry \ + && poetry install --no-root --no-interaction -# Copy necessary files and install dependencies -WORKDIR $PYSETUP_PATH -COPY ./ ./ +#################################################################################################### +# Stage 2: UDF Builder - adds UDF code and installs UDF-specific deps +#################################################################################################### +FROM base-builder AS udf-builder + +ENV EXAMPLE_PATH="/opt/pysetup/examples/sink/async_log" +ENV POETRY_VIRTUALENVS_IN_PROJECT=true WORKDIR $EXAMPLE_PATH -RUN poetry lock -RUN poetry install --no-cache --no-root && \ - rm -rf ~/.cache/pypoetry/ +COPY examples/sink/async_log/ ./ +RUN poetry install --no-root --no-interaction #################################################################################################### -# udf: used for running the udf vertices +# Stage 3: UDF Runtime - clean container with only needed stuff #################################################################################################### FROM python:3.10-slim-bullseye AS udf -ENV PYTHONFAULTHANDLER=1 \ - PYTHONUNBUFFERED=1 \ - PYTHONHASHSEED=random \ - PYSETUP_PATH="/opt/pysetup" - +ENV PYSETUP_PATH="/opt/pysetup" ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/sink/async_log" ENV VENV_PATH="$EXAMPLE_PATH/.venv" ENV PATH="$VENV_PATH/bin:$PATH" -# Install only runtime system dependencies -RUN apt-get update \ - && apt-get install --no-install-recommends -y \ - wget \ +RUN apt-get update && apt-get install --no-install-recommends -y wget \ && apt-get clean && rm -rf /var/lib/apt/lists/* \ - \ - # install dumb-init && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ && chmod +x /dumb-init -# Copy only the virtual environment and application code from builder WORKDIR $PYSETUP_PATH -COPY --from=builder $VENV_PATH $VENV_PATH -COPY examples/sink/async_log/ $EXAMPLE_PATH/ +COPY --from=udf-builder $VENV_PATH $VENV_PATH +COPY --from=udf-builder $EXAMPLE_PATH $EXAMPLE_PATH WORKDIR $EXAMPLE_PATH RUN chmod +x entry.sh diff --git a/examples/sink/log/Dockerfile b/examples/sink/log/Dockerfile index f7c9d21f..0c927395 100644 --- a/examples/sink/log/Dockerfile +++ b/examples/sink/log/Dockerfile @@ -1,76 +1,50 @@ #################################################################################################### -# builder: install needed dependencies +# Stage 1: Base Builder - installs core dependencies using poetry #################################################################################################### +FROM python:3.10-slim-bullseye AS base-builder -FROM python:3.10-slim-bullseye AS builder - -ENV PYTHONFAULTHANDLER=1 \ - PYTHONUNBUFFERED=1 \ - PYTHONHASHSEED=random \ - PIP_NO_CACHE_DIR=on \ - PIP_DISABLE_PIP_VERSION_CHECK=on \ - PIP_DEFAULT_TIMEOUT=100 \ - POETRY_VERSION=1.2.2 \ - POETRY_HOME="/opt/poetry" \ - POETRY_VIRTUALENVS_IN_PROJECT=true \ - POETRY_NO_INTERACTION=1 \ - PYSETUP_PATH="/opt/pysetup" - -ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/sink/log" -ENV VENV_PATH="$EXAMPLE_PATH/.venv" -ENV PATH="$POETRY_HOME/bin:$VENV_PATH/bin:$PATH" +ENV PYSETUP_PATH="/opt/pysetup" +WORKDIR $PYSETUP_PATH -RUN apt-get update \ - && apt-get install --no-install-recommends -y \ - curl \ - wget \ - # deps for building python deps - build-essential \ - && apt-get install -y git \ +# Copy only core dependency files first for better caching +COPY pyproject.toml poetry.lock README.md ./ +COPY pynumaflow/ ./pynumaflow/ +RUN apt-get update && apt-get install --no-install-recommends -y \ + curl wget build-essential git \ && apt-get clean && rm -rf /var/lib/apt/lists/* \ - \ - # install dumb-init - && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ - && chmod +x /dumb-init \ - && curl -sSL https://install.python-poetry.org | python3 - + && pip install poetry \ + && poetry install --no-root --no-interaction -# Copy necessary files and install dependencies -WORKDIR $PYSETUP_PATH -COPY ./ ./ +#################################################################################################### +# Stage 2: UDF Builder - adds UDF code and installs UDF-specific deps +#################################################################################################### +FROM base-builder AS udf-builder + +ENV EXAMPLE_PATH="/opt/pysetup/examples/sink/log" +ENV POETRY_VIRTUALENVS_IN_PROJECT=true WORKDIR $EXAMPLE_PATH -RUN poetry lock -RUN poetry install --no-cache --no-root && \ - rm -rf ~/.cache/pypoetry/ +COPY examples/sink/log/ ./ +RUN poetry install --no-root --no-interaction #################################################################################################### -# udf: used for running the udf vertices +# Stage 3: UDF Runtime - clean container with only needed stuff #################################################################################################### FROM python:3.10-slim-bullseye AS udf -ENV PYTHONFAULTHANDLER=1 \ - PYTHONUNBUFFERED=1 \ - PYTHONHASHSEED=random \ - PYSETUP_PATH="/opt/pysetup" - +ENV PYSETUP_PATH="/opt/pysetup" ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/sink/log" ENV VENV_PATH="$EXAMPLE_PATH/.venv" ENV PATH="$VENV_PATH/bin:$PATH" -# Install only runtime system dependencies -RUN apt-get update \ - && apt-get install --no-install-recommends -y \ - wget \ +RUN apt-get update && apt-get install --no-install-recommends -y wget \ && apt-get clean && rm -rf /var/lib/apt/lists/* \ - \ - # install dumb-init && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ && chmod +x /dumb-init -# Copy only the virtual environment and application code from builder WORKDIR $PYSETUP_PATH -COPY --from=builder $VENV_PATH $VENV_PATH -COPY examples/sink/log/ $EXAMPLE_PATH/ +COPY --from=udf-builder $VENV_PATH $VENV_PATH +COPY --from=udf-builder $EXAMPLE_PATH $EXAMPLE_PATH WORKDIR $EXAMPLE_PATH RUN chmod +x entry.sh diff --git a/examples/source/simple_source/Dockerfile b/examples/source/simple_source/Dockerfile index c015c524..ca33fee3 100644 --- a/examples/source/simple_source/Dockerfile +++ b/examples/source/simple_source/Dockerfile @@ -1,76 +1,50 @@ #################################################################################################### -# builder: install needed dependencies +# Stage 1: Base Builder - installs core dependencies using poetry #################################################################################################### +FROM python:3.10-slim-bullseye AS base-builder -FROM python:3.10-slim-bullseye AS builder - -ENV PYTHONFAULTHANDLER=1 \ - PYTHONUNBUFFERED=1 \ - PYTHONHASHSEED=random \ - PIP_NO_CACHE_DIR=on \ - PIP_DISABLE_PIP_VERSION_CHECK=on \ - PIP_DEFAULT_TIMEOUT=100 \ - POETRY_VERSION=1.2.2 \ - POETRY_HOME="/opt/poetry" \ - POETRY_VIRTUALENVS_IN_PROJECT=true \ - POETRY_NO_INTERACTION=1 \ - PYSETUP_PATH="/opt/pysetup" - -ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/source/simple_source" -ENV VENV_PATH="$EXAMPLE_PATH/.venv" -ENV PATH="$POETRY_HOME/bin:$VENV_PATH/bin:$PATH" +ENV PYSETUP_PATH="/opt/pysetup" +WORKDIR $PYSETUP_PATH -RUN apt-get update \ - && apt-get install --no-install-recommends -y \ - curl \ - wget \ - # deps for building python deps - build-essential \ - && apt-get install -y git \ +# Copy only core dependency files first for better caching +COPY pyproject.toml poetry.lock README.md ./ +COPY pynumaflow/ ./pynumaflow/ +RUN apt-get update && apt-get install --no-install-recommends -y \ + curl wget build-essential git \ && apt-get clean && rm -rf /var/lib/apt/lists/* \ - \ - # install dumb-init - && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ - && chmod +x /dumb-init \ - && curl -sSL https://install.python-poetry.org | python3 - + && pip install poetry \ + && poetry install --no-root --no-interaction -# Copy necessary files and install dependencies -WORKDIR $PYSETUP_PATH -COPY ./ ./ +#################################################################################################### +# Stage 2: UDF Builder - adds UDF code and installs UDF-specific deps +#################################################################################################### +FROM base-builder AS udf-builder + +ENV EXAMPLE_PATH="/opt/pysetup/examples/source/simple_source" +ENV POETRY_VIRTUALENVS_IN_PROJECT=true WORKDIR $EXAMPLE_PATH -RUN poetry lock -RUN poetry install --no-cache --no-root && \ - rm -rf ~/.cache/pypoetry/ +COPY examples/source/simple_source/ ./ +RUN poetry install --no-root --no-interaction #################################################################################################### -# udf: used for running the udf vertices +# Stage 3: UDF Runtime - clean container with only needed stuff #################################################################################################### FROM python:3.10-slim-bullseye AS udf -ENV PYTHONFAULTHANDLER=1 \ - PYTHONUNBUFFERED=1 \ - PYTHONHASHSEED=random \ - PYSETUP_PATH="/opt/pysetup" - +ENV PYSETUP_PATH="/opt/pysetup" ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/source/simple_source" ENV VENV_PATH="$EXAMPLE_PATH/.venv" ENV PATH="$VENV_PATH/bin:$PATH" -# Install only runtime system dependencies -RUN apt-get update \ - && apt-get install --no-install-recommends -y \ - wget \ +RUN apt-get update && apt-get install --no-install-recommends -y wget \ && apt-get clean && rm -rf /var/lib/apt/lists/* \ - \ - # install dumb-init && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ && chmod +x /dumb-init -# Copy only the virtual environment and application code from builder WORKDIR $PYSETUP_PATH -COPY --from=builder $VENV_PATH $VENV_PATH -COPY examples/source/simple_source/ $EXAMPLE_PATH/ +COPY --from=udf-builder $VENV_PATH $VENV_PATH +COPY --from=udf-builder $EXAMPLE_PATH $EXAMPLE_PATH WORKDIR $EXAMPLE_PATH RUN chmod +x entry.sh diff --git a/examples/sourcetransform/async_event_time_filter/Dockerfile b/examples/sourcetransform/async_event_time_filter/Dockerfile index b087d03c..26e66415 100644 --- a/examples/sourcetransform/async_event_time_filter/Dockerfile +++ b/examples/sourcetransform/async_event_time_filter/Dockerfile @@ -1,76 +1,50 @@ #################################################################################################### -# builder: install needed dependencies +# Stage 1: Base Builder - installs core dependencies using poetry #################################################################################################### +FROM python:3.10-slim-bullseye AS base-builder -FROM python:3.10-slim-bullseye AS builder - -ENV PYTHONFAULTHANDLER=1 \ - PYTHONUNBUFFERED=1 \ - PYTHONHASHSEED=random \ - PIP_NO_CACHE_DIR=on \ - PIP_DISABLE_PIP_VERSION_CHECK=on \ - PIP_DEFAULT_TIMEOUT=100 \ - POETRY_VERSION=1.2.2 \ - POETRY_HOME="/opt/poetry" \ - POETRY_VIRTUALENVS_IN_PROJECT=true \ - POETRY_NO_INTERACTION=1 \ - PYSETUP_PATH="/opt/pysetup" - -ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/sourcetransform/async_event_time_filter" -ENV VENV_PATH="$EXAMPLE_PATH/.venv" -ENV PATH="$POETRY_HOME/bin:$VENV_PATH/bin:$PATH" +ENV PYSETUP_PATH="/opt/pysetup" +WORKDIR $PYSETUP_PATH -RUN apt-get update \ - && apt-get install --no-install-recommends -y \ - curl \ - wget \ - # deps for building python deps - build-essential \ - && apt-get install -y git \ +# Copy only core dependency files first for better caching +COPY pyproject.toml poetry.lock README.md ./ +COPY pynumaflow/ ./pynumaflow/ +RUN apt-get update && apt-get install --no-install-recommends -y \ + curl wget build-essential git \ && apt-get clean && rm -rf /var/lib/apt/lists/* \ - \ - # install dumb-init - && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ - && chmod +x /dumb-init \ - && curl -sSL https://install.python-poetry.org | python3 - + && pip install poetry \ + && poetry install --no-root --no-interaction -# Copy necessary files and install dependencies -WORKDIR $PYSETUP_PATH -COPY ./ ./ +#################################################################################################### +# Stage 2: UDF Builder - adds UDF code and installs UDF-specific deps +#################################################################################################### +FROM base-builder AS udf-builder + +ENV EXAMPLE_PATH="/opt/pysetup/examples/sourcetransform/async_event_time_filter" +ENV POETRY_VIRTUALENVS_IN_PROJECT=true WORKDIR $EXAMPLE_PATH -RUN poetry lock -RUN poetry install --no-cache --no-root && \ - rm -rf ~/.cache/pypoetry/ +COPY examples/sourcetransform/async_event_time_filter/ ./ +RUN poetry install --no-root --no-interaction #################################################################################################### -# udf: used for running the udf vertices +# Stage 3: UDF Runtime - clean container with only needed stuff #################################################################################################### FROM python:3.10-slim-bullseye AS udf -ENV PYTHONFAULTHANDLER=1 \ - PYTHONUNBUFFERED=1 \ - PYTHONHASHSEED=random \ - PYSETUP_PATH="/opt/pysetup" - +ENV PYSETUP_PATH="/opt/pysetup" ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/sourcetransform/async_event_time_filter" ENV VENV_PATH="$EXAMPLE_PATH/.venv" ENV PATH="$VENV_PATH/bin:$PATH" -# Install only runtime system dependencies -RUN apt-get update \ - && apt-get install --no-install-recommends -y \ - wget \ +RUN apt-get update && apt-get install --no-install-recommends -y wget \ && apt-get clean && rm -rf /var/lib/apt/lists/* \ - \ - # install dumb-init && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ && chmod +x /dumb-init -# Copy only the virtual environment and application code from builder WORKDIR $PYSETUP_PATH -COPY --from=builder $VENV_PATH $VENV_PATH -COPY examples/sourcetransform/async_event_time_filter/ $EXAMPLE_PATH/ +COPY --from=udf-builder $VENV_PATH $VENV_PATH +COPY --from=udf-builder $EXAMPLE_PATH $EXAMPLE_PATH WORKDIR $EXAMPLE_PATH RUN chmod +x entry.sh diff --git a/examples/sourcetransform/event_time_filter/Dockerfile b/examples/sourcetransform/event_time_filter/Dockerfile index 75cde043..9e702ecf 100644 --- a/examples/sourcetransform/event_time_filter/Dockerfile +++ b/examples/sourcetransform/event_time_filter/Dockerfile @@ -1,76 +1,50 @@ #################################################################################################### -# builder: install needed dependencies +# Stage 1: Base Builder - installs core dependencies using poetry #################################################################################################### +FROM python:3.10-slim-bullseye AS base-builder -FROM python:3.10-slim-bullseye AS builder - -ENV PYTHONFAULTHANDLER=1 \ - PYTHONUNBUFFERED=1 \ - PYTHONHASHSEED=random \ - PIP_NO_CACHE_DIR=on \ - PIP_DISABLE_PIP_VERSION_CHECK=on \ - PIP_DEFAULT_TIMEOUT=100 \ - POETRY_VERSION=1.2.2 \ - POETRY_HOME="/opt/poetry" \ - POETRY_VIRTUALENVS_IN_PROJECT=true \ - POETRY_NO_INTERACTION=1 \ - PYSETUP_PATH="/opt/pysetup" - -ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/sourcetransform/event_time_filter" -ENV VENV_PATH="$EXAMPLE_PATH/.venv" -ENV PATH="$POETRY_HOME/bin:$VENV_PATH/bin:$PATH" +ENV PYSETUP_PATH="/opt/pysetup" +WORKDIR $PYSETUP_PATH -RUN apt-get update \ - && apt-get install --no-install-recommends -y \ - curl \ - wget \ - # deps for building python deps - build-essential \ - && apt-get install -y git \ +# Copy only core dependency files first for better caching +COPY pyproject.toml poetry.lock README.md ./ +COPY pynumaflow/ ./pynumaflow/ +RUN apt-get update && apt-get install --no-install-recommends -y \ + curl wget build-essential git \ && apt-get clean && rm -rf /var/lib/apt/lists/* \ - \ - # install dumb-init - && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ - && chmod +x /dumb-init \ - && curl -sSL https://install.python-poetry.org | python3 - + && pip install poetry \ + && poetry install --no-root --no-interaction -# Copy necessary files and install dependencies -WORKDIR $PYSETUP_PATH -COPY ./ ./ +#################################################################################################### +# Stage 2: UDF Builder - adds UDF code and installs UDF-specific deps +#################################################################################################### +FROM base-builder AS udf-builder + +ENV EXAMPLE_PATH="/opt/pysetup/examples/sourcetransform/event_time_filter" +ENV POETRY_VIRTUALENVS_IN_PROJECT=true WORKDIR $EXAMPLE_PATH -RUN poetry lock -RUN poetry install --no-cache --no-root && \ - rm -rf ~/.cache/pypoetry/ +COPY examples/sourcetransform/event_time_filter/ ./ +RUN poetry install --no-root --no-interaction #################################################################################################### -# udf: used for running the udf vertices +# Stage 3: UDF Runtime - clean container with only needed stuff #################################################################################################### FROM python:3.10-slim-bullseye AS udf -ENV PYTHONFAULTHANDLER=1 \ - PYTHONUNBUFFERED=1 \ - PYTHONHASHSEED=random \ - PYSETUP_PATH="/opt/pysetup" - +ENV PYSETUP_PATH="/opt/pysetup" ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/sourcetransform/event_time_filter" ENV VENV_PATH="$EXAMPLE_PATH/.venv" ENV PATH="$VENV_PATH/bin:$PATH" -# Install only runtime system dependencies -RUN apt-get update \ - && apt-get install --no-install-recommends -y \ - wget \ +RUN apt-get update && apt-get install --no-install-recommends -y wget \ && apt-get clean && rm -rf /var/lib/apt/lists/* \ - \ - # install dumb-init && wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ && chmod +x /dumb-init -# Copy only the virtual environment and application code from builder WORKDIR $PYSETUP_PATH -COPY --from=builder $VENV_PATH $VENV_PATH -COPY examples/sourcetransform/event_time_filter/ $EXAMPLE_PATH/ +COPY --from=udf-builder $VENV_PATH $VENV_PATH +COPY --from=udf-builder $EXAMPLE_PATH $EXAMPLE_PATH WORKDIR $EXAMPLE_PATH RUN chmod +x entry.sh diff --git a/poetry.lock b/poetry.lock index ab404238..bddc755c 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1105,6 +1105,7 @@ description = "Fast implementation of asyncio event loop on top of libuv" optional = false python-versions = ">=3.8.0" groups = ["main"] +markers = "sys_platform == \"linux\"" files = [ {file = "uvloop-0.19.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:de4313d7f575474c8f5a12e163f6d89c0a878bc49219641d49e6f1444369a90e"}, {file = "uvloop-0.19.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5588bd21cf1fcf06bded085f37e43ce0e00424197e7c10e77afd4bbefffef428"}, @@ -1167,4 +1168,4 @@ test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess [metadata] lock-version = "2.1" python-versions = ">=3.9, <3.13" -content-hash = "afdf2080de75a2057f967d51fb0e96189ea51ce3a2970f3ee623701c6d4a70d7" +content-hash = "d3cdb3d6868fa027b722920ebd118d52f11334101eb2d448348a9b46c386629d" diff --git a/pyproject.toml b/pyproject.toml index 74e1c586..7c3a95c7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,7 @@ google-api-core = "^2.11.0" grpcio-status = "^1.48.1" protobuf = ">=3.20,<6.0" aiorun = "^2023.7" -uvloop = "^0.19.0" +uvloop = { version = "^0.19.0", markers = "sys_platform == 'linux'" } psutil = "^6.0.0" [tool.poetry.group.dev] @@ -80,7 +80,6 @@ extend-exclude = [ "*_pb2*.py", "*.pyi" ] -output-format = "full" -[tool.ruff.lint] +format = "grouped" select = ["E", "F", "UP"] From 7b6386182c48674f5f3f9f20f653d481c8c81bca Mon Sep 17 00:00:00 2001 From: sapkota-aayush Date: Fri, 4 Jul 2025 10:19:27 -0400 Subject: [PATCH 7/9] fix: remove invalid ruff config from pyproject.toml to resolve lint error Signed-off-by: sapkota-aayush --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 7c3a95c7..52f4ccde 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -80,6 +80,5 @@ extend-exclude = [ "*_pb2*.py", "*.pyi" ] -format = "grouped" select = ["E", "F", "UP"] From a606ed9c246c381bf06b46d41fb83916aec1e5fa Mon Sep 17 00:00:00 2001 From: sapkota-aayush Date: Fri, 4 Jul 2025 14:53:45 -0400 Subject: [PATCH 8/9] chore: resolve Ruff config and dependency feedback, ready for review Signed-off-by: sapkota-aayush --- poetry.lock | 3 +-- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/poetry.lock b/poetry.lock index bddc755c..ab404238 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1105,7 +1105,6 @@ description = "Fast implementation of asyncio event loop on top of libuv" optional = false python-versions = ">=3.8.0" groups = ["main"] -markers = "sys_platform == \"linux\"" files = [ {file = "uvloop-0.19.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:de4313d7f575474c8f5a12e163f6d89c0a878bc49219641d49e6f1444369a90e"}, {file = "uvloop-0.19.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5588bd21cf1fcf06bded085f37e43ce0e00424197e7c10e77afd4bbefffef428"}, @@ -1168,4 +1167,4 @@ test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess [metadata] lock-version = "2.1" python-versions = ">=3.9, <3.13" -content-hash = "d3cdb3d6868fa027b722920ebd118d52f11334101eb2d448348a9b46c386629d" +content-hash = "afdf2080de75a2057f967d51fb0e96189ea51ce3a2970f3ee623701c6d4a70d7" diff --git a/pyproject.toml b/pyproject.toml index 52f4ccde..41816b73 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,7 @@ google-api-core = "^2.11.0" grpcio-status = "^1.48.1" protobuf = ">=3.20,<6.0" aiorun = "^2023.7" -uvloop = { version = "^0.19.0", markers = "sys_platform == 'linux'" } +uvloop = "^0.19.0" psutil = "^6.0.0" [tool.poetry.group.dev] From 05a56497863801634a8e1d77101fcf5bbb4dad43 Mon Sep 17 00:00:00 2001 From: sapkota-aayush Date: Mon, 7 Jul 2025 22:46:27 -0400 Subject: [PATCH 9/9] chore(docs): move DOCKER_OPTIMIZATION.md to docs directory as requested Signed-off-by: sapkota-aayush --- DOCKER_OPTIMIZATION.md => docs/DOCKER_OPTIMIZATION.md | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename DOCKER_OPTIMIZATION.md => docs/DOCKER_OPTIMIZATION.md (100%) diff --git a/DOCKER_OPTIMIZATION.md b/docs/DOCKER_OPTIMIZATION.md similarity index 100% rename from DOCKER_OPTIMIZATION.md rename to docs/DOCKER_OPTIMIZATION.md