From 9ebae2d7bedd148221206e930c6fde940aeded10 Mon Sep 17 00:00:00 2001 From: Jon Davies Date: Thu, 23 Aug 2018 15:31:00 +0100 Subject: [PATCH 1/2] [AIRFLOW-2947] Removed Kubernetes manifests. --- scripts/ci/kubernetes/kube/airflow.yaml | 154 --------- scripts/ci/kubernetes/kube/configmaps.yaml | 365 --------------------- scripts/ci/kubernetes/kube/deploy.sh | 56 ---- scripts/ci/kubernetes/kube/postgres.yaml | 88 ----- scripts/ci/kubernetes/kube/secrets.yaml | 25 -- scripts/ci/kubernetes/kube/volumes.yaml | 87 ----- 6 files changed, 775 deletions(-) delete mode 100644 scripts/ci/kubernetes/kube/airflow.yaml delete mode 100644 scripts/ci/kubernetes/kube/configmaps.yaml delete mode 100755 scripts/ci/kubernetes/kube/deploy.sh delete mode 100644 scripts/ci/kubernetes/kube/postgres.yaml delete mode 100644 scripts/ci/kubernetes/kube/secrets.yaml delete mode 100644 scripts/ci/kubernetes/kube/volumes.yaml diff --git a/scripts/ci/kubernetes/kube/airflow.yaml b/scripts/ci/kubernetes/kube/airflow.yaml deleted file mode 100644 index 4f451ba44a687..0000000000000 --- a/scripts/ci/kubernetes/kube/airflow.yaml +++ /dev/null @@ -1,154 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one * -# or more contributor license agreements. See the NOTICE file * -# distributed with this work for additional information * -# regarding copyright ownership. The ASF licenses this file * -# to you under the Apache License, Version 2.0 (the * -# "License"); you may not use this file except in compliance * -# with the License. You may obtain a copy of the License at * -# * -# http://www.apache.org/licenses/LICENSE-2.0 * -# * -# Unless required by applicable law or agreed to in writing, * -# software distributed under the License is distributed on an * -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * -# KIND, either express or implied. See the License for the * -# specific language governing permissions and limitations * -# under the License. * - -apiVersion: rbac.authorization.k8s.io/v1beta1 -kind: ClusterRoleBinding -metadata: - name: admin-rbac -subjects: - - kind: ServiceAccount - # Reference to upper's `metadata.name` - name: default - # Reference to upper's `metadata.namespace` - namespace: default -roleRef: - kind: ClusterRole - name: cluster-admin - apiGroup: rbac.authorization.k8s.io ---- -apiVersion: extensions/v1beta1 -kind: Deployment -metadata: - name: airflow -spec: - replicas: 1 - template: - metadata: - labels: - name: airflow - spec: - initContainers: - - name: "init" - image: airflow - imagePullPolicy: IfNotPresent - volumeMounts: - - name: airflow-configmap - mountPath: /root/airflow/airflow.cfg - subPath: airflow.cfg - - name: airflow-dags - mountPath: /root/airflow/dags - - name: test-volume - mountPath: /root/test_volume - env: - - name: SQL_ALCHEMY_CONN - valueFrom: - secretKeyRef: - name: airflow-secrets - key: sql_alchemy_conn - command: - - "bash" - args: - - "-cx" - - "./tmp/airflow-test-env-init.sh" - containers: - - name: webserver - image: airflow - imagePullPolicy: IfNotPresent - ports: - - name: webserver - containerPort: 8080 - args: ["webserver"] - env: - - name: AIRFLOW_KUBE_NAMESPACE - valueFrom: - fieldRef: - fieldPath: metadata.namespace - - name: SQL_ALCHEMY_CONN - valueFrom: - secretKeyRef: - name: airflow-secrets - key: sql_alchemy_conn - volumeMounts: - - name: airflow-configmap - mountPath: /root/airflow/airflow.cfg - subPath: airflow.cfg - - name: airflow-dags - mountPath: /root/airflow/dags - - name: airflow-logs - mountPath: /root/airflow/logs -# readinessProbe: -# initialDelaySeconds: 5 -# timeoutSeconds: 5 -# periodSeconds: 5 -# httpGet: -# path: /login -# port: 8080 -# livenessProbe: -# initialDelaySeconds: 5 -# timeoutSeconds: 5 -# failureThreshold: 5 -# httpGet: -# path: /login -# port: 8080 - - name: scheduler - image: airflow - imagePullPolicy: IfNotPresent - args: ["scheduler"] - env: - - name: AIRFLOW_KUBE_NAMESPACE - valueFrom: - fieldRef: - fieldPath: metadata.namespace - - name: SQL_ALCHEMY_CONN - valueFrom: - secretKeyRef: - name: airflow-secrets - key: sql_alchemy_conn - volumeMounts: - - name: airflow-configmap - mountPath: /root/airflow/airflow.cfg - subPath: airflow.cfg - - name: airflow-dags - mountPath: /root/airflow/dags - - name: airflow-logs - mountPath: /root/airflow/logs - volumes: - - name: airflow-dags - persistentVolumeClaim: - claimName: airflow-dags - - name: test-volume - persistentVolumeClaim: - claimName: test-volume - - name: airflow-logs - persistentVolumeClaim: - claimName: airflow-logs - - name: airflow-configmap - configMap: - name: airflow-configmap ---- -apiVersion: v1 -kind: Service -metadata: - name: airflow -spec: - type: NodePort - ports: - - port: 8080 - nodePort: 30809 - selector: - name: airflow - diff --git a/scripts/ci/kubernetes/kube/configmaps.yaml b/scripts/ci/kubernetes/kube/configmaps.yaml deleted file mode 100644 index f8e99778f59d8..0000000000000 --- a/scripts/ci/kubernetes/kube/configmaps.yaml +++ /dev/null @@ -1,365 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one * -# or more contributor license agreements. See the NOTICE file * -# distributed with this work for additional information * -# regarding copyright ownership. The ASF licenses this file * -# to you under the Apache License, Version 2.0 (the * -# "License"); you may not use this file except in compliance * -# with the License. You may obtain a copy of the License at * -# * -# http://www.apache.org/licenses/LICENSE-2.0 * -# * -# Unless required by applicable law or agreed to in writing, * -# software distributed under the License is distributed on an * -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * -# KIND, either express or implied. See the License for the * -# specific language governing permissions and limitations * -# under the License. * -apiVersion: v1 -kind: ConfigMap -metadata: - name: airflow-configmap -data: - airflow.cfg: | - [core] - airflow_home = /root/airflow - dags_folder = /root/airflow/dags - base_log_folder = /root/airflow/logs - logging_level = INFO - executor = KubernetesExecutor - parallelism = 32 - load_examples = True - plugins_folder = /root/airflow/plugins - sql_alchemy_conn = $SQL_ALCHEMY_CONN - - [scheduler] - dag_dir_list_interval = 300 - child_process_log_directory = /root/airflow/logs/scheduler - # Task instances listen for external kill signal (when you clear tasks - # from the CLI or the UI), this defines the frequency at which they should - # listen (in seconds). - job_heartbeat_sec = 5 - max_threads = 2 - - # The scheduler constantly tries to trigger new tasks (look at the - # scheduler section in the docs for more information). This defines - # how often the scheduler should run (in seconds). - scheduler_heartbeat_sec = 5 - - # after how much time should the scheduler terminate in seconds - # -1 indicates to run continuously (see also num_runs) - run_duration = -1 - - # after how much time a new DAGs should be picked up from the filesystem - min_file_process_interval = 0 - - statsd_on = False - statsd_host = localhost - statsd_port = 8125 - statsd_prefix = airflow - - print_stats_interval = 30 - scheduler_zombie_task_threshold = 300 - max_tis_per_query = 0 - authenticate = False - - # Turn off scheduler catchup by setting this to False. - # Default behavior is unchanged and - # Command Line Backfills still work, but the scheduler - # will not do scheduler catchup if this is False, - # however it can be set on a per DAG basis in the - # DAG definition (catchup) - catchup_by_default = True - - [webserver] - # The base url of your website as airflow cannot guess what domain or - # cname you are using. This is used in automated emails that - # airflow sends to point links to the right web server - base_url = http://localhost:8080 - - # The ip specified when starting the web server - web_server_host = 0.0.0.0 - - # The port on which to run the web server - web_server_port = 8080 - - # Paths to the SSL certificate and key for the web server. When both are - # provided SSL will be enabled. This does not change the web server port. - web_server_ssl_cert = - web_server_ssl_key = - - # Number of seconds the webserver waits before killing gunicorn master that doesn't respond - web_server_master_timeout = 120 - - # Number of seconds the gunicorn webserver waits before timing out on a worker - web_server_worker_timeout = 120 - - # Number of workers to refresh at a time. When set to 0, worker refresh is - # disabled. When nonzero, airflow periodically refreshes webserver workers by - # bringing up new ones and killing old ones. - worker_refresh_batch_size = 1 - - # Number of seconds to wait before refreshing a batch of workers. - worker_refresh_interval = 30 - - # Secret key used to run your flask app - secret_key = temporary_key - - # Number of workers to run the Gunicorn web server - workers = 4 - - # The worker class gunicorn should use. Choices include - # sync (default), eventlet, gevent - worker_class = sync - - # Log files for the gunicorn webserver. '-' means log to stderr. - access_logfile = - - error_logfile = - - - # Expose the configuration file in the web server - expose_config = False - - # Set to true to turn on authentication: - # https://airflow.incubator.apache.org/security.html#web-authentication - authenticate = False - - # Filter the list of dags by owner name (requires authentication to be enabled) - filter_by_owner = False - - # Filtering mode. Choices include user (default) and ldapgroup. - # Ldap group filtering requires using the ldap backend - # - # Note that the ldap server needs the "memberOf" overlay to be set up - # in order to user the ldapgroup mode. - owner_mode = user - - # Default DAG view. Valid values are: - # tree, graph, duration, gantt, landing_times - dag_default_view = tree - - # Default DAG orientation. Valid values are: - # LR (Left->Right), TB (Top->Bottom), RL (Right->Left), BT (Bottom->Top) - dag_orientation = LR - - # Puts the webserver in demonstration mode; blurs the names of Operators for - # privacy. - demo_mode = False - - # The amount of time (in secs) webserver will wait for initial handshake - # while fetching logs from other worker machine - log_fetch_timeout_sec = 5 - - # By default, the webserver shows paused DAGs. Flip this to hide paused - # DAGs by default - hide_paused_dags_by_default = False - - # Consistent page size across all listing views in the UI - page_size = 100 - - # Use FAB-based webserver with RBAC feature - rbac = True - - [smtp] - # If you want airflow to send emails on retries, failure, and you want to use - # the airflow.utils.email.send_email_smtp function, you have to configure an - # smtp server here - smtp_host = localhost - smtp_starttls = True - smtp_ssl = False - # Uncomment and set the user/pass settings if you want to use SMTP AUTH - # smtp_user = airflow - # smtp_password = airflow - smtp_port = 25 - smtp_mail_from = airflow@example.com - - [kubernetes] - airflow_configmap = airflow-configmap - worker_container_repository = airflow - worker_container_tag = latest - worker_container_image_pull_policy = IfNotPresent - worker_dags_folder = /tmp/dags - delete_worker_pods = True - git_repo = https://github.com/apache/incubator-airflow.git - git_branch = master - git_subpath = airflow/example_dags/ - git_user = - git_password = - dags_volume_claim = airflow-dags - dags_volume_subpath = - logs_volume_claim = airflow-logs - logs_volume_subpath = - in_cluster = True - namespace = default - gcp_service_account_keys = - - # For cloning DAGs from git repositories into volumes: https://github.com/kubernetes/git-sync - git_sync_container_repository = gcr.io/google-containers/git-sync-amd64 - git_sync_container_tag = v2.0.5 - git_sync_init_container_name = git-sync-clone - - [kubernetes_node_selectors] - # The Key-value pairs to be given to worker pods. - # The worker pods will be scheduled to the nodes of the specified key-value pairs. - # Should be supplied in the format: key = value - - [kubernetes_secrets] - SQL_ALCHEMY_CONN = airflow-secrets=sql_alchemy_conn - - [hive] - # Default mapreduce queue for HiveOperator tasks - default_hive_mapred_queue = - - [celery] - # This section only applies if you are using the CeleryExecutor in - # [core] section above - - # The app name that will be used by celery - celery_app_name = airflow.executors.celery_executor - - # The concurrency that will be used when starting workers with the - # "airflow worker" command. This defines the number of task instances that - # a worker will take, so size up your workers based on the resources on - # your worker box and the nature of your tasks - worker_concurrency = 16 - - # When you start an airflow worker, airflow starts a tiny web server - # subprocess to serve the workers local log files to the airflow main - # web server, who then builds pages and sends them to users. This defines - # the port on which the logs are served. It needs to be unused, and open - # visible from the main web server to connect into the workers. - worker_log_server_port = 8793 - - # The Celery broker URL. Celery supports RabbitMQ, Redis and experimentally - # a sqlalchemy database. Refer to the Celery documentation for more - # information. - # http://docs.celeryproject.org/en/latest/userguide/configuration.html#broker-settings - broker_url = sqla+mysql://airflow:airflow@localhost:3306/airflow - - # The Celery result_backend. When a job finishes, it needs to update the - # metadata of the job. Therefore it will post a message on a message bus, - # or insert it into a database (depending of the backend) - # This status is used by the scheduler to update the state of the task - # The use of a database is highly recommended - # http://docs.celeryproject.org/en/latest/userguide/configuration.html#task-result-backend-settings - result_backend = db+mysql://airflow:airflow@localhost:3306/airflow - - # Celery Flower is a sweet UI for Celery. Airflow has a shortcut to start - # it `airflow flower`. This defines the IP that Celery Flower runs on - flower_host = 0.0.0.0 - - # The root URL for Flower - # Ex: flower_url_prefix = /flower - flower_url_prefix = - - # This defines the port that Celery Flower runs on - flower_port = 5555 - - # Default queue that tasks get assigned to and that worker listen on. - default_queue = default - - # Import path for celery configuration options - celery_config_options = airflow.config_templates.default_celery.DEFAULT_CELERY_CONFIG - - [celery_broker_transport_options] - # The visibility timeout defines the number of seconds to wait for the worker - # to acknowledge the task before the message is redelivered to another worker. - # Make sure to increase the visibility timeout to match the time of the longest - # ETA you're planning to use. Especially important in case of using Redis or SQS - visibility_timeout = 21600 - - # In case of using SSL - ssl_active = False - ssl_key = - ssl_cert = - ssl_cacert = - - [dask] - # This section only applies if you are using the DaskExecutor in - # [core] section above - - # The IP address and port of the Dask cluster's scheduler. - cluster_address = 127.0.0.1:8786 - # TLS/ SSL settings to access a secured Dask scheduler. - tls_ca = - tls_cert = - tls_key = - - [ldap] - # set this to ldaps://: - uri = - user_filter = objectClass=* - user_name_attr = uid - group_member_attr = memberOf - superuser_filter = - data_profiler_filter = - bind_user = cn=Manager,dc=example,dc=com - bind_password = insecure - basedn = dc=example,dc=com - cacert = /etc/ca/ldap_ca.crt - search_scope = LEVEL - - [mesos] - # Mesos master address which MesosExecutor will connect to. - master = localhost:5050 - - # The framework name which Airflow scheduler will register itself as on mesos - framework_name = Airflow - - # Number of cpu cores required for running one task instance using - # 'airflow run --local -p ' - # command on a mesos slave - task_cpu = 1 - - # Memory in MB required for running one task instance using - # 'airflow run --local -p ' - # command on a mesos slave - task_memory = 256 - - # Enable framework checkpointing for mesos - # See http://mesos.apache.org/documentation/latest/slave-recovery/ - checkpoint = False - - # Failover timeout in milliseconds. - # When checkpointing is enabled and this option is set, Mesos waits - # until the configured timeout for - # the MesosExecutor framework to re-register after a failover. Mesos - # shuts down running tasks if the - # MesosExecutor framework fails to re-register within this timeframe. - # failover_timeout = 604800 - - # Enable framework authentication for mesos - # See http://mesos.apache.org/documentation/latest/configuration/ - authenticate = False - - # Mesos credentials, if authentication is enabled - # default_principal = admin - # default_secret = admin - - # Optional Docker Image to run on slave before running the command - # This image should be accessible from mesos slave i.e mesos slave - # should be able to pull this docker image before executing the command. - # docker_image_slave = puckel/docker-airflow - - [kerberos] - ccache = /tmp/airflow_krb5_ccache - # gets augmented with fqdn - principal = airflow - reinit_frequency = 3600 - kinit_path = kinit - keytab = airflow.keytab - - [cli] - api_client = airflow.api.client.json_client - endpoint_url = http://localhost:8080 - - [api] - auth_backend = airflow.api.auth.backend.default - - [github_enterprise] - api_rev = v3 - - [admin] - # UI to hide sensitive variable fields when set to True - hide_sensitive_variable_fields = True - - [elasticsearch] - elasticsearch_host = diff --git a/scripts/ci/kubernetes/kube/deploy.sh b/scripts/ci/kubernetes/kube/deploy.sh deleted file mode 100755 index a9a42a7a12d12..0000000000000 --- a/scripts/ci/kubernetes/kube/deploy.sh +++ /dev/null @@ -1,56 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one * -# or more contributor license agreements. See the NOTICE file * -# distributed with this work for additional information * -# regarding copyright ownership. The ASF licenses this file * -# to you under the Apache License, Version 2.0 (the * -# "License"); you may not use this file except in compliance * -# with the License. You may obtain a copy of the License at * -# * -# http://www.apache.org/licenses/LICENSE-2.0 * -# * -# Unless required by applicable law or agreed to in writing, * -# software distributed under the License is distributed on an * -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * -# KIND, either express or implied. See the License for the * -# specific language governing permissions and limitations * -# under the License. * - -IMAGE=${1:-airflow/ci} -TAG=${2:-latest} -DIRNAME=$(cd "$(dirname "$0")"; pwd) - -kubectl delete -f $DIRNAME/postgres.yaml -kubectl delete -f $DIRNAME/airflow.yaml -kubectl delete -f $DIRNAME/secrets.yaml - -kubectl apply -f $DIRNAME/secrets.yaml -kubectl apply -f $DIRNAME/configmaps.yaml -kubectl apply -f $DIRNAME/postgres.yaml -kubectl apply -f $DIRNAME/volumes.yaml -kubectl apply -f $DIRNAME/airflow.yaml - -# wait for up to 10 minutes for everything to be deployed -for i in {1..150} -do - echo "------- Running kubectl get pods -------" - PODS=$(kubectl get pods | awk 'NR>1 {print $0}') - echo "$PODS" - NUM_AIRFLOW_READY=$(echo $PODS | grep airflow | awk '{print $2}' | grep -E '([0-9])\/(\1)' | wc -l | xargs) - NUM_POSTGRES_READY=$(echo $PODS | grep postgres | awk '{print $2}' | grep -E '([0-9])\/(\1)' | wc -l | xargs) - if [ "$NUM_AIRFLOW_READY" == "1" ] && [ "$NUM_POSTGRES_READY" == "1" ]; then - break - fi - sleep 4 -done - -POD=$(kubectl get pods -o go-template --template '{{range .items}}{{.metadata.name}}{{"\n"}}{{end}}' | grep airflow | head -1) - -echo "------- pod description -------" -kubectl describe pod $POD -echo "------- webserver logs -------" -kubectl logs $POD webserver -echo "------- scheduler logs -------" -kubectl logs $POD scheduler -echo "--------------" diff --git a/scripts/ci/kubernetes/kube/postgres.yaml b/scripts/ci/kubernetes/kube/postgres.yaml deleted file mode 100644 index 1130921ee9d1b..0000000000000 --- a/scripts/ci/kubernetes/kube/postgres.yaml +++ /dev/null @@ -1,88 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one * -# or more contributor license agreements. See the NOTICE file * -# distributed with this work for additional information * -# regarding copyright ownership. The ASF licenses this file * -# to you under the Apache License, Version 2.0 (the * -# "License"); you may not use this file except in compliance * -# with the License. You may obtain a copy of the License at * -# * -# http://www.apache.org/licenses/LICENSE-2.0 * -# * -# Unless required by applicable law or agreed to in writing, * -# software distributed under the License is distributed on an * -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * -# KIND, either express or implied. See the License for the * -# specific language governing permissions and limitations * -# under the License. * - -kind: Deployment -apiVersion: extensions/v1beta1 -metadata: - name: postgres-airflow -spec: - replicas: 1 - template: - metadata: - labels: - name: postgres-airflow - spec: - restartPolicy: Always - containers: - - name: postgres - image: postgres - imagePullPolicy: IfNotPresent - ports: - - containerPort: 5432 - protocol: TCP - volumeMounts: - - name: dbvol - mountPath: /var/lib/postgresql/data/pgdata - subPath: pgdata - env: - - name: POSTGRES_USER - value: root - - name: POSTGRES_PASSWORD - value: root - - name: POSTGRES_DB - value: airflow - - name: PGDATA - value: /var/lib/postgresql/data/pgdata - - name: POD_IP - valueFrom: { fieldRef: { fieldPath: status.podIP } } - livenessProbe: - initialDelaySeconds: 60 - timeoutSeconds: 5 - failureThreshold: 5 - exec: - command: - - /bin/sh - - -c - - exec pg_isready --host $POD_IP || if [[ $(psql -qtAc --host $POD_IP 'SELECT pg_is_in_recovery') != "f" ]]; then exit 0 else; exit 1; fi - readinessProbe: - initialDelaySeconds: 5 - timeoutSeconds: 5 - periodSeconds: 5 - exec: - command: - - /bin/sh - - -c - - exec pg_isready --host $POD_IP - resources: - requests: - memory: .5Gi - cpu: .5 - volumes: - - name: dbvol - emptyDir: {} ---- -apiVersion: v1 -kind: Service -metadata: - name: postgres-airflow -spec: - clusterIP: None - ports: - - port: 5432 - targetPort: 5432 - selector: - name: postgres-airflow diff --git a/scripts/ci/kubernetes/kube/secrets.yaml b/scripts/ci/kubernetes/kube/secrets.yaml deleted file mode 100644 index a93a0103e0f97..0000000000000 --- a/scripts/ci/kubernetes/kube/secrets.yaml +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one * -# or more contributor license agreements. See the NOTICE file * -# distributed with this work for additional information * -# regarding copyright ownership. The ASF licenses this file * -# to you under the Apache License, Version 2.0 (the * -# "License"); you may not use this file except in compliance * -# with the License. You may obtain a copy of the License at * -# * -# http://www.apache.org/licenses/LICENSE-2.0 * -# * -# Unless required by applicable law or agreed to in writing, * -# software distributed under the License is distributed on an * -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * -# KIND, either express or implied. See the License for the * -# specific language governing permissions and limitations * -# under the License. * -apiVersion: v1 -kind: Secret -metadata: - name: airflow-secrets -type: Opaque -data: - # The sql_alchemy_conn value is a base64 encoded representation of this connection string: - # postgresql+psycopg2://root:root@postgres-airflow:5432/airflow - sql_alchemy_conn: cG9zdGdyZXNxbCtwc3ljb3BnMjovL3Jvb3Q6cm9vdEBwb3N0Z3Jlcy1haXJmbG93OjU0MzIvYWlyZmxvdwo= diff --git a/scripts/ci/kubernetes/kube/volumes.yaml b/scripts/ci/kubernetes/kube/volumes.yaml deleted file mode 100644 index b5488e7c7a711..0000000000000 --- a/scripts/ci/kubernetes/kube/volumes.yaml +++ /dev/null @@ -1,87 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one * -# or more contributor license agreements. See the NOTICE file * -# distributed with this work for additional information * -# regarding copyright ownership. The ASF licenses this file * -# to you under the Apache License, Version 2.0 (the * -# "License"); you may not use this file except in compliance * -# with the License. You may obtain a copy of the License at * -# * -# http://www.apache.org/licenses/LICENSE-2.0 * -# * -# Unless required by applicable law or agreed to in writing, * -# software distributed under the License is distributed on an * -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * -# KIND, either express or implied. See the License for the * -# specific language governing permissions and limitations * -# under the License. * - -# The backing volume can be anything you want, it just needs to be `ReadWriteOnce` -# I'm using hostPath since minikube is nice for testing, but any (non-local) volume will work on a real cluster -kind: PersistentVolume -apiVersion: v1 -metadata: - name: airflow-dags -spec: - accessModes: - - ReadWriteOnce - capacity: - storage: 2Gi - hostPath: - path: /airflow-dags/ ---- -kind: PersistentVolumeClaim -apiVersion: v1 -metadata: - name: airflow-dags -spec: - accessModes: - - ReadWriteMany - resources: - requests: - storage: 2Gi ---- -kind: PersistentVolume -apiVersion: v1 -metadata: - name: airflow-logs -spec: - accessModes: - - ReadWriteMany - capacity: - storage: 2Gi - hostPath: - path: /airflow-logs/ ---- -kind: PersistentVolumeClaim -apiVersion: v1 -metadata: - name: airflow-logs -spec: - accessModes: - - ReadWriteMany - resources: - requests: - storage: 2Gi ---- -kind: PersistentVolume -apiVersion: v1 -metadata: - name: test-volume -spec: - accessModes: - - ReadWriteOnce - capacity: - storage: 2Gi - hostPath: - path: /airflow-dags/ ---- -kind: PersistentVolumeClaim -apiVersion: v1 -metadata: - name: test-volume -spec: - accessModes: - - ReadWriteMany - resources: - requests: - storage: 2Gi From 5bd5a7bad638241c01ac1ddd0fe8f8c7a3e95d27 Mon Sep 17 00:00:00 2001 From: Jon Davies Date: Thu, 23 Aug 2018 15:37:41 +0100 Subject: [PATCH 2/2] [AIRFLOW-2947] Added base Helm chart. --- .../ci/kubernetes/helm/airflow/.helmignore | 21 + scripts/ci/kubernetes/helm/airflow/Chart.yaml | 6 + .../kubernetes/helm/airflow/requirements.yaml | 5 + .../helm/airflow/templates/NOTES.txt | 19 + .../helm/airflow/templates/_helpers.tpl | 42 ++ .../helm/airflow/templates/configmaps.yaml | 370 ++++++++++++++++++ .../templates/deployment-scheduler.yaml | 70 ++++ .../airflow/templates/deployment-web.yaml | 95 +++++ .../helm/airflow/templates/ingress.yaml | 38 ++ .../airflow/templates/role-scheduler.yaml | 21 + .../helm/airflow/templates/role-worker.yaml | 21 + .../templates/rolebinding-scheduler.yaml | 17 + .../airflow/templates/rolebinding-worker.yaml | 17 + .../helm/airflow/templates/secrets.yaml | 31 ++ .../helm/airflow/templates/service.yaml | 19 + .../templates/serviceaccount-scheduler.yaml | 9 + .../templates/serviceaccount-worker.yaml | 9 + .../ci/kubernetes/helm/airflow/values.yaml | 70 ++++ 18 files changed, 880 insertions(+) create mode 100644 scripts/ci/kubernetes/helm/airflow/.helmignore create mode 100644 scripts/ci/kubernetes/helm/airflow/Chart.yaml create mode 100644 scripts/ci/kubernetes/helm/airflow/requirements.yaml create mode 100644 scripts/ci/kubernetes/helm/airflow/templates/NOTES.txt create mode 100644 scripts/ci/kubernetes/helm/airflow/templates/_helpers.tpl create mode 100644 scripts/ci/kubernetes/helm/airflow/templates/configmaps.yaml create mode 100644 scripts/ci/kubernetes/helm/airflow/templates/deployment-scheduler.yaml create mode 100644 scripts/ci/kubernetes/helm/airflow/templates/deployment-web.yaml create mode 100644 scripts/ci/kubernetes/helm/airflow/templates/ingress.yaml create mode 100644 scripts/ci/kubernetes/helm/airflow/templates/role-scheduler.yaml create mode 100644 scripts/ci/kubernetes/helm/airflow/templates/role-worker.yaml create mode 100644 scripts/ci/kubernetes/helm/airflow/templates/rolebinding-scheduler.yaml create mode 100644 scripts/ci/kubernetes/helm/airflow/templates/rolebinding-worker.yaml create mode 100644 scripts/ci/kubernetes/helm/airflow/templates/secrets.yaml create mode 100644 scripts/ci/kubernetes/helm/airflow/templates/service.yaml create mode 100644 scripts/ci/kubernetes/helm/airflow/templates/serviceaccount-scheduler.yaml create mode 100644 scripts/ci/kubernetes/helm/airflow/templates/serviceaccount-worker.yaml create mode 100644 scripts/ci/kubernetes/helm/airflow/values.yaml diff --git a/scripts/ci/kubernetes/helm/airflow/.helmignore b/scripts/ci/kubernetes/helm/airflow/.helmignore new file mode 100644 index 0000000000000..f0c1319444416 --- /dev/null +++ b/scripts/ci/kubernetes/helm/airflow/.helmignore @@ -0,0 +1,21 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj diff --git a/scripts/ci/kubernetes/helm/airflow/Chart.yaml b/scripts/ci/kubernetes/helm/airflow/Chart.yaml new file mode 100644 index 0000000000000..945f4dc6ebe07 --- /dev/null +++ b/scripts/ci/kubernetes/helm/airflow/Chart.yaml @@ -0,0 +1,6 @@ +apiVersion: v1 +appVersion: "1.0" +description: A Helm chart for Kubernetes +name: airflow +version: 0.1.0 +appVersion: 1.10.0 diff --git a/scripts/ci/kubernetes/helm/airflow/requirements.yaml b/scripts/ci/kubernetes/helm/airflow/requirements.yaml new file mode 100644 index 0000000000000..d6e3f27c9754b --- /dev/null +++ b/scripts/ci/kubernetes/helm/airflow/requirements.yaml @@ -0,0 +1,5 @@ +dependencies: +- name: postgresql + version: 0.15.0 + repository: https://kubernetes-charts.storage.googleapis.com/ + condition: postgresql.enabled diff --git a/scripts/ci/kubernetes/helm/airflow/templates/NOTES.txt b/scripts/ci/kubernetes/helm/airflow/templates/NOTES.txt new file mode 100644 index 0000000000000..a5e18c2b7c923 --- /dev/null +++ b/scripts/ci/kubernetes/helm/airflow/templates/NOTES.txt @@ -0,0 +1,19 @@ +1. Get the application URL by running these commands: +{{- if .Values.ingress.enabled }} +{{- range .Values.ingress.hosts }} + http{{ if $.Values.ingress.tls }}s{{ end }}://{{ . }}{{ $.Values.ingress.path }} +{{- end }} +{{- else if contains "NodePort" .Values.service.type }} + export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ template "airflow.fullname" . }}) + export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") + echo http://$NODE_IP:$NODE_PORT +{{- else if contains "LoadBalancer" .Values.service.type }} + NOTE: It may take a few minutes for the LoadBalancer IP to be available. + You can watch the status of by running 'kubectl get svc -w {{ template "airflow.fullname" . }}' + export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ template "airflow.fullname" . }} -o jsonpath='{.status.loadBalancer.ingress[0].ip}') + echo http://$SERVICE_IP:{{ .Values.service.port }} +{{- else if contains "ClusterIP" .Values.service.type }} + export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app={{ template "airflow.name" . }},release={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") + echo "Visit http://127.0.0.1:8080 to use your application" + kubectl port-forward $POD_NAME 8080:80 +{{- end }} diff --git a/scripts/ci/kubernetes/helm/airflow/templates/_helpers.tpl b/scripts/ci/kubernetes/helm/airflow/templates/_helpers.tpl new file mode 100644 index 0000000000000..bd35eac999028 --- /dev/null +++ b/scripts/ci/kubernetes/helm/airflow/templates/_helpers.tpl @@ -0,0 +1,42 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "airflow.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "airflow.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "airflow.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified postgresql name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +*/}} +{{- define "airflow.postgresql.fullname" -}} +{{- $name := default "postgresql" .Values.postgresql.nameOverride -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} + diff --git a/scripts/ci/kubernetes/helm/airflow/templates/configmaps.yaml b/scripts/ci/kubernetes/helm/airflow/templates/configmaps.yaml new file mode 100644 index 0000000000000..40ca168f47d08 --- /dev/null +++ b/scripts/ci/kubernetes/helm/airflow/templates/configmaps.yaml @@ -0,0 +1,370 @@ +# Licensed to the Apache Software Foundation (ASF) under one * +# or more contributor license agreements. See the NOTICE file * +# distributed with this work for additional information * +# regarding copyright ownership. The ASF licenses this file * +# to you under the Apache License, Version 2.0 (the * +# "License"); you may not use this file except in compliance * +# with the License. You may obtain a copy of the License at * +# * +# http://www.apache.org/licenses/LICENSE-2.0 * +# * +# Unless required by applicable law or agreed to in writing, * +# software distributed under the License is distributed on an * +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * +# KIND, either express or implied. See the License for the * +# specific language governing permissions and limitations * +# under the License. * +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "airflow.fullname" . }}-config + labels: + app: {{ template "airflow.name" . }} + chart: {{ template "airflow.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +data: + airflow.cfg: | + [core] + airflow_home = {{ .Values.airflow.core.airflow_home }} + dags_folder = {{ .Values.airflow.core.dags_folder }} + base_log_folder = {{ .Values.airflow.core.base_log_folder }} + logging_level = INFO + logging_config_class = {{ .Values.airflow.core.logging_config_class }} + executor = KubernetesExecutor + parallelism = 32 + load_examples = {{ default "True" .Values.airflow.core.load_examples }} + plugins_folder = {{ .Values.airflow.core.plugins_folder }} + sql_alchemy_conn = $SQL_ALCHEMY_CONN + encrypt_s3_logs = False + + [scheduler] + dag_dir_list_interval = 300 + child_process_log_directory = {{ .Values.airflow.scheduler.child_process_log_directory }} + # Task instances listen for external kill signal (when you clear tasks + # from the CLI or the UI), this defines the frequency at which they should + # listen (in seconds). + job_heartbeat_sec = 5 + max_threads = 2 + + # The scheduler constantly tries to trigger new tasks (look at the + # scheduler section in the docs for more information). This defines + # how often the scheduler should run (in seconds). + scheduler_heartbeat_sec = 5 + + # after how much time should the scheduler terminate in seconds + # -1 indicates to run continuously (see also num_runs) + run_duration = -1 + + # after how much time a new DAGs should be picked up from the filesystem + min_file_process_interval = 0 + + statsd_on = False + statsd_host = localhost + statsd_port = 8125 + statsd_prefix = airflow + + # How many seconds to wait between file-parsing loops to prevent the logs from being spammed. + min_file_parsing_loop_time = 1 + + print_stats_interval = 30 + scheduler_zombie_task_threshold = 300 + max_tis_per_query = 0 + authenticate = False + + # Turn off scheduler catchup by setting this to False. + # Default behavior is unchanged and + # Command Line Backfills still work, but the scheduler + # will not do scheduler catchup if this is False, + # however it can be set on a per DAG basis in the + # DAG definition (catchup) + catchup_by_default = True + + [webserver] + # The base url of your website as airflow cannot guess what domain or + # cname you are using. This is used in automated emails that + # airflow sends to point links to the right web server + base_url = http://localhost:8080 + + # The ip specified when starting the web server + web_server_host = 0.0.0.0 + + # The port on which to run the web server + web_server_port = 8080 + + # Paths to the SSL certificate and key for the web server. When both are + # provided SSL will be enabled. This does not change the web server port. + web_server_ssl_cert = + web_server_ssl_key = + + # Number of seconds the webserver waits before killing gunicorn master that doesn't respond + web_server_master_timeout = 120 + + # Number of seconds the gunicorn webserver waits before timing out on a worker + web_server_worker_timeout = 120 + + # Number of workers to refresh at a time. When set to 0, worker refresh is + # disabled. When nonzero, airflow periodically refreshes webserver workers by + # bringing up new ones and killing old ones. + worker_refresh_batch_size = 1 + + # Number of seconds to wait before refreshing a batch of workers. + worker_refresh_interval = 30 + + # Secret key used to run your flask app + secret_key = temporary_key + + # Number of workers to run the Gunicorn web server + workers = 4 + + # The worker class gunicorn should use. Choices include + # sync (default), eventlet, gevent + worker_class = sync + + # Log files for the gunicorn webserver. '-' means log to stderr. + access_logfile = - + error_logfile = - + + # Expose the configuration file in the web server + expose_config = False + + # Set to true to turn on authentication: + # https://airflow.incubator.apache.org/security.html#web-authentication + authenticate = False + + # Filter the list of dags by owner name (requires authentication to be enabled) + filter_by_owner = False + + # Filtering mode. Choices include user (default) and ldapgroup. + # Ldap group filtering requires using the ldap backend + # + # Note that the ldap server needs the "memberOf" overlay to be set up + # in order to user the ldapgroup mode. + owner_mode = user + + # Default DAG view. Valid values are: + # tree, graph, duration, gantt, landing_times + dag_default_view = tree + + # Default DAG orientation. Valid values are: + # LR (Left->Right), TB (Top->Bottom), RL (Right->Left), BT (Bottom->Top) + dag_orientation = LR + + # Puts the webserver in demonstration mode; blurs the names of Operators for + # privacy. + demo_mode = False + + # The amount of time (in secs) webserver will wait for initial handshake + # while fetching logs from other worker machine + log_fetch_timeout_sec = 5 + + # By default, the webserver shows paused DAGs. Flip this to hide paused + # DAGs by default + hide_paused_dags_by_default = False + + # Consistent page size across all listing views in the UI + page_size = 100 + + # Use FAB-based webserver with RBAC feature + rbac = True + + [smtp] + # If you want airflow to send emails on retries, failure, and you want to use + # the airflow.utils.email.send_email_smtp function, you have to configure an + # smtp server here + smtp_host = localhost + smtp_starttls = True + smtp_ssl = False + # Uncomment and set the user/pass settings if you want to use SMTP AUTH + # smtp_user = airflow + # smtp_password = airflow + smtp_port = 25 + smtp_mail_from = airflow@example.com + + [kubernetes] + airflow_configmap = {{ template "airflow.fullname" . }}-config + worker_container_repository = {{ .Values.image.repository }} + worker_container_tag = {{ .Values.image.tag }} + worker_container_image_pull_policy = IfNotPresent + delete_worker_pods = {{ default "False" .Values.airflow.kubernetes.delete_worker_pods }} + git_repo = {{ .Values.airflow.kubernetes.git_repo }} + git_branch = {{ .Values.airflow.kubernetes.git_branch }} + git_subpath = {{ .Values.airflow.kubernetes.git_subpath }} + git_user = {{ .Values.airflow.kubernetes.git_user }} + git_password = {{ .Values.airflow.kubernetes.git_password }} + dags_volume_claim = {{ .Values.airflow.kubernetes.dags_volume_claim }} + dags_volume_subpath = {{ .Values.airflow.kubernetes.dags_volume_subpath }} + logs_volume_claim = {{ .Values.airflow.kubernetes.logs_volume_claim }} + logs_volume_subpath = {{ .Values.airflow.kubernetes.logs_volume_subpath }} + in_cluster = {{ default "True" .Values.airflow.kubernetes.in_cluster }} + namespace = {{ .Release.Namespace }} + gcp_service_account_keys = {{ .Values.airflow.kubernetes.gcp_service_account_keys }} + worker_service_account_name = {{ template "airflow.fullname" . }}-worker + + # For cloning DAGs from git repositories into volumes: https://github.com/kubernetes/git-sync + git_sync_container_repository = gcr.io/google-containers/git-sync-amd64 + git_sync_container_tag = v2.0.6 + git_sync_init_container_name = git-sync-clone + + [kubernetes_secrets] + SQL_ALCHEMY_CONN = {{ template "airflow.fullname" . }}-secrets=sql_alchemy_conn + + [hive] + # Default mapreduce queue for HiveOperator tasks + default_hive_mapred_queue = + + [celery] + # This section only applies if you are using the CeleryExecutor in + # [core] section above + + # The app name that will be used by celery + celery_app_name = airflow.executors.celery_executor + + # The concurrency that will be used when starting workers with the + # "airflow worker" command. This defines the number of task instances that + # a worker will take, so size up your workers based on the resources on + # your worker box and the nature of your tasks + worker_concurrency = 16 + + # When you start an airflow worker, airflow starts a tiny web server + # subprocess to serve the workers local log files to the airflow main + # web server, who then builds pages and sends them to users. This defines + # the port on which the logs are served. It needs to be unused, and open + # visible from the main web server to connect into the workers. + worker_log_server_port = 8793 + + # The Celery broker URL. Celery supports RabbitMQ, Redis and experimentally + # a sqlalchemy database. Refer to the Celery documentation for more + # information. + # http://docs.celeryproject.org/en/latest/userguide/configuration.html#broker-settings + broker_url = sqla+mysql://airflow:airflow@localhost:3306/airflow + + # The Celery result_backend. When a job finishes, it needs to update the + # metadata of the job. Therefore it will post a message on a message bus, + # or insert it into a database (depending of the backend) + # This status is used by the scheduler to update the state of the task + # The use of a database is highly recommended + # http://docs.celeryproject.org/en/latest/userguide/configuration.html#task-result-backend-settings + result_backend = db+mysql://airflow:airflow@localhost:3306/airflow + + # Celery Flower is a sweet UI for Celery. Airflow has a shortcut to start + # it `airflow flower`. This defines the IP that Celery Flower runs on + flower_host = 0.0.0.0 + + # The root URL for Flower + # Ex: flower_url_prefix = /flower + flower_url_prefix = + + # This defines the port that Celery Flower runs on + flower_port = 5555 + + # Default queue that tasks get assigned to and that worker listen on. + default_queue = default + + # Import path for celery configuration options + celery_config_options = airflow.config_templates.default_celery.DEFAULT_CELERY_CONFIG + + [celery_broker_transport_options] + # The visibility timeout defines the number of seconds to wait for the worker + # to acknowledge the task before the message is redelivered to another worker. + # Make sure to increase the visibility timeout to match the time of the longest + # ETA you're planning to use. Especially important in case of using Redis or SQS + visibility_timeout = 21600 + + # In case of using SSL + ssl_active = False + ssl_key = + ssl_cert = + ssl_cacert = + + [dask] + # This section only applies if you are using the DaskExecutor in + # [core] section above + + # The IP address and port of the Dask cluster's scheduler. + cluster_address = 127.0.0.1:8786 + # TLS/ SSL settings to access a secured Dask scheduler. + tls_ca = + tls_cert = + tls_key = + + [ldap] + # set this to ldaps://: + uri = + user_filter = objectClass=* + user_name_attr = uid + group_member_attr = memberOf + superuser_filter = + data_profiler_filter = + bind_user = cn=Manager,dc=example,dc=com + bind_password = insecure + basedn = dc=example,dc=com + cacert = /etc/ca/ldap_ca.crt + search_scope = LEVEL + + [mesos] + # Mesos master address which MesosExecutor will connect to. + master = localhost:5050 + + # The framework name which Airflow scheduler will register itself as on mesos + framework_name = Airflow + + # Number of cpu cores required for running one task instance using + # 'airflow run --local -p ' + # command on a mesos slave + task_cpu = 1 + + # Memory in MB required for running one task instance using + # 'airflow run --local -p ' + # command on a mesos slave + task_memory = 256 + + # Enable framework checkpointing for mesos + # See http://mesos.apache.org/documentation/latest/slave-recovery/ + checkpoint = False + + # Failover timeout in milliseconds. + # When checkpointing is enabled and this option is set, Mesos waits + # until the configured timeout for + # the MesosExecutor framework to re-register after a failover. Mesos + # shuts down running tasks if the + # MesosExecutor framework fails to re-register within this timeframe. + # failover_timeout = 604800 + + # Enable framework authentication for mesos + # See http://mesos.apache.org/documentation/latest/configuration/ + authenticate = False + + # Mesos credentials, if authentication is enabled + # default_principal = admin + # default_secret = admin + + # Optional Docker Image to run on slave before running the command + # This image should be accessible from mesos slave i.e mesos slave + # should be able to pull this docker image before executing the command. + # docker_image_slave = puckel/docker-airflow + + [kerberos] + ccache = /tmp/airflow_krb5_ccache + # gets augmented with fqdn + principal = airflow + reinit_frequency = 3600 + kinit_path = kinit + keytab = airflow.keytab + + [cli] + api_client = airflow.api.client.json_client + endpoint_url = http://localhost:8080 + + [api] + auth_backend = airflow.api.auth.backend.default + + [github_enterprise] + api_rev = v3 + + [admin] + # UI to hide sensitive variable fields when set to True + hide_sensitive_variable_fields = True + + [elasticsearch] + elasticsearch_host = diff --git a/scripts/ci/kubernetes/helm/airflow/templates/deployment-scheduler.yaml b/scripts/ci/kubernetes/helm/airflow/templates/deployment-scheduler.yaml new file mode 100644 index 0000000000000..dada4cadfd14f --- /dev/null +++ b/scripts/ci/kubernetes/helm/airflow/templates/deployment-scheduler.yaml @@ -0,0 +1,70 @@ +apiVersion: apps/v1beta2 +kind: Deployment +metadata: + name: {{ template "airflow.fullname" . }}-scheduler + labels: + app: {{ template "airflow.name" . }}-scheduler + chart: {{ template "airflow.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + app: {{ template "airflow.name" . }}-scheduler + release: {{ .Release.Name }} + template: + metadata: + labels: + app: {{ template "airflow.name" . }}-scheduler + release: {{ .Release.Name }} + spec: + serviceAccountName: {{ template "airflow.fullname" . }}-scheduler + containers: + - name: {{ .Chart.Name }}-scheduler + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + args: ["scheduler"] + env: + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POSTGRES_HOST + value: {{ template "airflow.postgresql.fullname" . }} + - name: POSTGRES_NAME + value: {{ .Values.postgresql.postgresDatabase }} + - name: POSTGRES_PASSWORD + value: {{ .Values.postgresql.postgresPassword }} + - name: POSTGRES_USERNAME + value: {{ .Values.postgresql.postgresUser }} + - name: SQL_ALCHEMY_CONN + value: postgresql+psycopg2://{{ .Values.postgresql.postgresUser }}:{{ .Values.postgresql.postgresPassword }}@{{ template "airflow.postgresql.fullname" . }}:5432/{{ .Values.postgresql.postgresDatabase }} + - name: TZ + value: Etc/UTC + volumeMounts: + - name: airflow-configmap + mountPath: /home/airflow/airflow.cfg + subPath: airflow.cfg + - name: airflow-logs + mountPath: /home/airflow/logs + resources: +{{ toYaml .Values.resources | indent 12 }} + volumes: + - name: airflow-configmap + configMap: + name: {{ template "airflow.fullname" . }}-config + - name: airflow-logs + emptyDir: {} + {{- with .Values.nodeSelector }} + nodeSelector: +{{ toYaml . | indent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: +{{ toYaml . | indent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: +{{ toYaml . | indent 8 }} + {{- end }} diff --git a/scripts/ci/kubernetes/helm/airflow/templates/deployment-web.yaml b/scripts/ci/kubernetes/helm/airflow/templates/deployment-web.yaml new file mode 100644 index 0000000000000..b53b4651dc39e --- /dev/null +++ b/scripts/ci/kubernetes/helm/airflow/templates/deployment-web.yaml @@ -0,0 +1,95 @@ +apiVersion: apps/v1beta2 +kind: Deployment +metadata: + name: {{ template "airflow.fullname" . }}-web + labels: + app: {{ template "airflow.name" . }}-web + chart: {{ template "airflow.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + app: {{ template "airflow.name" . }}-web + release: {{ .Release.Name }} + template: + metadata: + labels: + app: {{ template "airflow.name" . }}-web + release: {{ .Release.Name }} + spec: + initContainers: + - name: "init" + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + imagePullPolicy: IfNotPresent + volumeMounts: + - name: airflow-configmap + mountPath: /home/airflow/airflow.cfg + subPath: airflow.cfg + env: + - name: SQL_ALCHEMY_CONN + value: postgresql+psycopg2://{{ .Values.postgresql.postgresUser }}:{{ .Values.postgresql.postgresPassword }}@{{ template "airflow.postgresql.fullname" . }}:5432/{{ .Values.postgresql.postgresDatabase }} + command: + - "bash" + args: + - "-cx" + - "mkdir -pv /home/airflow/dags/ && ./home/airflow/airflow-init.sh" + containers: + - name: {{ .Chart.Name }}-web + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + ports: + - name: webserver + containerPort: 8080 + protocol: TCP + args: ["webserver"] + livenessProbe: + httpGet: + path: /login/ + port: webserver + readinessProbe: + httpGet: + path: /login/ + port: webserver + env: + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POSTGRES_HOST + value: {{ template "airflow.postgresql.fullname" . }} + - name: POSTGRES_NAME + value: {{ .Values.postgresql.postgresDatabase }} + - name: POSTGRES_PASSWORD + value: {{ .Values.postgresql.postgresPassword }} + - name: POSTGRES_USERNAME + value: {{ .Values.postgresql.postgresUser }} + - name: SQL_ALCHEMY_CONN + value: postgresql+psycopg2://{{ .Values.postgresql.postgresUser }}:{{ .Values.postgresql.postgresPassword }}@{{ template "airflow.postgresql.fullname" . }}:5432/{{ .Values.postgresql.postgresDatabase }} + - name: TZ + value: Etc/UTC + volumeMounts: + - name: airflow-configmap + mountPath: /home/airflow/airflow.cfg + subPath: airflow.cfg + resources: +{{ toYaml .Values.resources | indent 12 }} + volumes: + - name: airflow-configmap + configMap: + name: {{ template "airflow.fullname" . }}-config + - name: airflow-logs + emptyDir: {} + {{- with .Values.nodeSelector }} + nodeSelector: +{{ toYaml . | indent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: +{{ toYaml . | indent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: +{{ toYaml . | indent 8 }} + {{- end }} diff --git a/scripts/ci/kubernetes/helm/airflow/templates/ingress.yaml b/scripts/ci/kubernetes/helm/airflow/templates/ingress.yaml new file mode 100644 index 0000000000000..da6468da0418a --- /dev/null +++ b/scripts/ci/kubernetes/helm/airflow/templates/ingress.yaml @@ -0,0 +1,38 @@ +{{- if .Values.ingress.enabled -}} +{{- $fullName := include "airflow.fullname" . -}} +{{- $ingressPath := .Values.ingress.path -}} +apiVersion: extensions/v1beta1 +kind: Ingress +metadata: + name: {{ $fullName }} + labels: + app: {{ template "airflow.name" . }}-web + chart: {{ template "airflow.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +{{- with .Values.ingress.annotations }} + annotations: +{{ toYaml . | indent 4 }} +{{- end }} +spec: +{{- if .Values.ingress.tls }} + tls: + {{- range .Values.ingress.tls }} + - hosts: + {{- range .hosts }} + - {{ . }} + {{- end }} + secretName: {{ .secretName }} + {{- end }} +{{- end }} + rules: + {{- range .Values.ingress.hosts }} + - host: {{ . }} + http: + paths: + - path: {{ $ingressPath }} + backend: + serviceName: {{ $fullName }}-web + servicePort: webserver + {{- end }} +{{- end }} diff --git a/scripts/ci/kubernetes/helm/airflow/templates/role-scheduler.yaml b/scripts/ci/kubernetes/helm/airflow/templates/role-scheduler.yaml new file mode 100644 index 0000000000000..214b67b48f251 --- /dev/null +++ b/scripts/ci/kubernetes/helm/airflow/templates/role-scheduler.yaml @@ -0,0 +1,21 @@ +kind: Role +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: {{ template "airflow.fullname" . }}-scheduler + labels: + app: {{ template "airflow.name" . }} + chart: {{ template "airflow.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +rules: + - apiGroups: + - "" + resources: + - pods + - pods/log + verbs: + - create + - delete + - get + - watch + - list diff --git a/scripts/ci/kubernetes/helm/airflow/templates/role-worker.yaml b/scripts/ci/kubernetes/helm/airflow/templates/role-worker.yaml new file mode 100644 index 0000000000000..56925df58aecc --- /dev/null +++ b/scripts/ci/kubernetes/helm/airflow/templates/role-worker.yaml @@ -0,0 +1,21 @@ +kind: Role +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: {{ template "airflow.fullname" . }}-worker + labels: + app: {{ template "airflow.name" . }} + chart: {{ template "airflow.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +rules: + - apiGroups: + - "" + resources: + - pods + - pods/log + verbs: + - create + - delete + - get + - watch + - list diff --git a/scripts/ci/kubernetes/helm/airflow/templates/rolebinding-scheduler.yaml b/scripts/ci/kubernetes/helm/airflow/templates/rolebinding-scheduler.yaml new file mode 100644 index 0000000000000..801af985f2f83 --- /dev/null +++ b/scripts/ci/kubernetes/helm/airflow/templates/rolebinding-scheduler.yaml @@ -0,0 +1,17 @@ +kind: RoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: {{ template "airflow.fullname" . }}-scheduler + labels: + app: {{ template "airflow.name" . }} + chart: {{ template "airflow.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +subjects: + - kind: ServiceAccount + name: {{ template "airflow.fullname" . }}-scheduler + namespace: {{ .Release.Namespace }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: {{ template "airflow.fullname" . }}-scheduler diff --git a/scripts/ci/kubernetes/helm/airflow/templates/rolebinding-worker.yaml b/scripts/ci/kubernetes/helm/airflow/templates/rolebinding-worker.yaml new file mode 100644 index 0000000000000..36424aa048d8d --- /dev/null +++ b/scripts/ci/kubernetes/helm/airflow/templates/rolebinding-worker.yaml @@ -0,0 +1,17 @@ +kind: RoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: {{ template "airflow.fullname" . }}-worker + labels: + app: {{ template "airflow.name" . }} + chart: {{ template "airflow.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +subjects: + - kind: ServiceAccount + name: {{ template "airflow.fullname" . }}-worker + namespace: {{ .Release.Namespace }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: {{ template "airflow.fullname" . }}-worker diff --git a/scripts/ci/kubernetes/helm/airflow/templates/secrets.yaml b/scripts/ci/kubernetes/helm/airflow/templates/secrets.yaml new file mode 100644 index 0000000000000..903e6c495c015 --- /dev/null +++ b/scripts/ci/kubernetes/helm/airflow/templates/secrets.yaml @@ -0,0 +1,31 @@ +# Licensed to the Apache Software Foundation (ASF) under one * +# or more contributor license agreements. See the NOTICE file * +# distributed with this work for additional information * +# regarding copyright ownership. The ASF licenses this file * +# to you under the Apache License, Version 2.0 (the * +# "License"); you may not use this file except in compliance * +# with the License. You may obtain a copy of the License at * +# * +# http://www.apache.org/licenses/LICENSE-2.0 * +# * +# Unless required by applicable law or agreed to in writing, * +# software distributed under the License is distributed on an * +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * +# KIND, either express or implied. See the License for the * +# specific language governing permissions and limitations * +# under the License. * +apiVersion: v1 +kind: Secret +metadata: + name: {{ template "airflow.fullname" . }}-secrets + labels: + app: {{ template "airflow.name" . }} + chart: {{ template "airflow.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +type: Opaque +data: + # The sql_alchemy_conn value is a base64 encoded represenation of this connection string: + # postgresql+psycopg2://root:root@postgres-airflow:5432/airflow + {{ $sqlString := printf "postgresql+psycopg2://%s:%s@%s:5432/%s" .Values.postgresql.postgresUser .Values.postgresql.postgresPassword (include "airflow.postgresql.fullname" .) .Values.postgresql.postgresDatabase }} + sql_alchemy_conn: {{ $sqlString | b64enc }} diff --git a/scripts/ci/kubernetes/helm/airflow/templates/service.yaml b/scripts/ci/kubernetes/helm/airflow/templates/service.yaml new file mode 100644 index 0000000000000..8c125ac4bbd29 --- /dev/null +++ b/scripts/ci/kubernetes/helm/airflow/templates/service.yaml @@ -0,0 +1,19 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ template "airflow.fullname" . }}-web + labels: + app: {{ template "airflow.name" . }}-web + chart: {{ template "airflow.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: webserver + protocol: TCP + name: webserver + selector: + app: {{ template "airflow.name" . }}-web + release: {{ .Release.Name }} diff --git a/scripts/ci/kubernetes/helm/airflow/templates/serviceaccount-scheduler.yaml b/scripts/ci/kubernetes/helm/airflow/templates/serviceaccount-scheduler.yaml new file mode 100644 index 0000000000000..f8337441b1b15 --- /dev/null +++ b/scripts/ci/kubernetes/helm/airflow/templates/serviceaccount-scheduler.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "airflow.fullname" . }}-scheduler + labels: + app: {{ template "airflow.name" . }} + chart: {{ template "airflow.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} diff --git a/scripts/ci/kubernetes/helm/airflow/templates/serviceaccount-worker.yaml b/scripts/ci/kubernetes/helm/airflow/templates/serviceaccount-worker.yaml new file mode 100644 index 0000000000000..632470a190e6e --- /dev/null +++ b/scripts/ci/kubernetes/helm/airflow/templates/serviceaccount-worker.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "airflow.fullname" . }}-worker + labels: + app: {{ template "airflow.name" . }} + chart: {{ template "airflow.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} diff --git a/scripts/ci/kubernetes/helm/airflow/values.yaml b/scripts/ci/kubernetes/helm/airflow/values.yaml new file mode 100644 index 0000000000000..8c287230874d9 --- /dev/null +++ b/scripts/ci/kubernetes/helm/airflow/values.yaml @@ -0,0 +1,70 @@ +# Default values for airflow. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +replicaCount: 1 + +image: + repository: airflow + tag: 1.10.0 + pullPolicy: IfNotPresent + +service: + type: ClusterIP + port: 8080 + +ingress: + enabled: false + annotations: {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + path: / + hosts: + - chart-example.local + tls: [] + # - secretName: chart-example-tls + # hosts: + # - chart-example.local + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +nodeSelector: {} + +tolerations: [] + +affinity: {} + +airflow: + core: + dags_folder: + load_examples: True + logging_config_class: + kubernetes: + dags_volume_claim: + dags_volume_subpath: + delete_worker_pods: False + gcp_service_account_keys: + git_repo: + git_branch: + git_subpath: + git_user: + git_password: + in_cluster: True + logs_volume_claim: + logs_volume_subpath: + +postgresql: + enabled: true + postgresUser: airflow + postgresPassword: airflow + postgresDatabase: airflow