From b4bfa4d304d0d9e5125499adabfc451b2331219c Mon Sep 17 00:00:00 2001 From: Simon Li Date: Mon, 17 Jul 2017 14:43:31 +0100 Subject: [PATCH 01/30] Install django_prometheus on OMERO.web --- ansible/management-prometheus.yml | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/ansible/management-prometheus.yml b/ansible/management-prometheus.yml index fb246613..6ac00214 100644 --- a/ansible/management-prometheus.yml +++ b/ansible/management-prometheus.yml @@ -31,6 +31,28 @@ notify: - restart omero-server + - name: omero-web django prometheus install + become: yes + pip: + name: django_prometheus + state: present + version: 1.0.8 + virtualenv: "{{ omero_common_basedir }}/web/venv" + virtualenv_site_packages: yes + notify: + - restart omero-web + + - name: omero-web django prometheus configure + become: yes + copy: + content: | + config append -- omero.web.middleware '{"index":0, "class": "django_prometheus.middleware.PrometheusBeforeMiddleware"}' + config append -- omero.web.middleware '{"index":1000, "class": "django_prometheus.middleware.PrometheusAfterMiddleware"}' + config append -- omero.web.apps '"django_prometheus"' + dest: "{{ omero_common_basedir }}/web/config/django-prometheus.omero" + notify: + - restart omero-web + vars: # prometheus-jmx automatically creates this: jmx_javaagent: /opt/prometheus/jars/jmx_prometheus_javaagent.jar @@ -95,6 +117,13 @@ port: 9280 jobname: cadvisor-docker + - groupname: omero-web + groups: + - "{{ idr_environment | default('idr') + '-omero-hosts' }}" + port: 80 + jobname: django + metrics_path: /django_prometheus/metrics + prometheus_http_2xx_internal_targets: > {{ (groups[(idr_environment | default('idr')) + '-omero-hosts'] + From 0ec39f6e4e56f1ae9ba7d23c2d15ea6c948211a5 Mon Sep 17 00:00:00 2001 From: Simon Li Date: Mon, 17 Jul 2017 15:12:59 +0100 Subject: [PATCH 02/30] Use dev version of openmicroscopy.prometheus --- ansible/requirements.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ansible/requirements.yml b/ansible/requirements.yml index 041b87f0..a26ec60c 100644 --- a/ansible/requirements.yml +++ b/ansible/requirements.yml @@ -165,9 +165,10 @@ src: https://github.com/openmicroscopy/ansible-role-docker-slack-notifier/archive/0.0.2.tar.gz version: 0.0.2 +# TODO: Update and tag - name: openmicroscopy.prometheus - src: https://github.com/openmicroscopy/ansible-role-prometheus/archive/0.1.0.tar.gz - version: 0.1.1 + src: https://github.com/manics/ansible-role-prometheus/archive/metrics_path.tar.gz + version: metrics_path - name: openmicroscopy.prometheus-jmx src: https://github.com/openmicroscopy/ansible-role-prometheus-jmx/archive/0.1.0.tar.gz From 36d2fe13fe654c7e5e562a9312faee9239357ee0 Mon Sep 17 00:00:00 2001 From: Simon Li Date: Mon, 17 Jul 2017 15:13:44 +0100 Subject: [PATCH 03/30] Use dev version of openmicroscopy.prometheus-jmx --- ansible/requirements.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ansible/requirements.yml b/ansible/requirements.yml index a26ec60c..62504c60 100644 --- a/ansible/requirements.yml +++ b/ansible/requirements.yml @@ -170,9 +170,10 @@ src: https://github.com/manics/ansible-role-prometheus/archive/metrics_path.tar.gz version: metrics_path +# TODO: Update and tag - name: openmicroscopy.prometheus-jmx - src: https://github.com/openmicroscopy/ansible-role-prometheus-jmx/archive/0.1.0.tar.gz - version: 0.1.0 + src: https://github.com/manics/ansible-role-prometheus-jmx/archive/jmx_prometheus_javaagent_head.tar.gz + version: jmx_prometheus_javaagent_head - name: openmicroscopy.prometheus-node src: https://github.com/openmicroscopy/ansible-role-prometheus-node/archive/0.1.1.tar.gz From 5f6498339854013e050ed87ba0df5ef9be5d483b Mon Sep 17 00:00:00 2001 From: Simon Li Date: Mon, 17 Jul 2017 15:14:29 +0100 Subject: [PATCH 04/30] Reenable jmx-exporter for OMERO.server Java servers --- ansible/management-prometheus.yml | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/ansible/management-prometheus.yml b/ansible/management-prometheus.yml index 6ac00214..c0ccd07e 100644 --- a/ansible/management-prometheus.yml +++ b/ansible/management-prometheus.yml @@ -18,15 +18,15 @@ - role: openmicroscopy.omero-common tasks: - # TODO: Disabled due to https://github.com/prometheus/jmx_exporter/issues/156 + # TODO: Requires a custom build from HEAD - name: omero prometheus agent become: yes copy: content: | - # Disabled due to https://github.com/prometheus/jmx_exporter/issues/156 - #config set -- omero.jvmcfg.append.blitz "-javaagent:{{ jmx_javaagent }}=9180:/etc/prometheus/jmx-default-config.yml" - #config set -- omero.jvmcfg.append.indexer "-javaagent:{{ jmx_javaagent }}=9181:/etc/prometheus/jmx-default-config.yml" - #config set -- omero.jvmcfg.append.pixeldata "-javaagent:{{ jmx_javaagent }}=9182:/etc/prometheus/jmx-default-config.yml" + # Requires https://github.com/prometheus/jmx_exporter/pull/162 + config set -- omero.jvmcfg.append.blitz "-javaagent:{{ jmx_javaagent }}=9180:/etc/prometheus/jmx-default-config.yml" + config set -- omero.jvmcfg.append.indexer "-javaagent:{{ jmx_javaagent }}=9181:/etc/prometheus/jmx-default-config.yml" + config set -- omero.jvmcfg.append.pixeldata "-javaagent:{{ jmx_javaagent }}=9182:/etc/prometheus/jmx-default-config.yml" dest: "{{ omero_common_basedir }}/server/config/prometheus.omero" notify: - restart omero-server @@ -92,21 +92,24 @@ jobname: node-exporter # TODO: Disabled due to https://github.com/prometheus/jmx_exporter/issues/156 +# Currently testing using a custom build from HEAD +# https://github.com/prometheus/jmx_exporter/pull/162 +# https://github.com/prometheus/jmx_exporter/commit/5b180affbefa445088d4608b9640ca404ce91ae2 - groupname: blitz - # groups: - # - "{{ idr_environment | default('idr') + '-omero-hosts' }}" + groups: + - "{{ idr_environment | default('idr') + '-omero-hosts' }}" port: 9180 jobname: jmx-blitz - groupname: indexer - # groups: - # - "{{ idr_environment | default('idr') + '-omero-hosts' }}" + groups: + - "{{ idr_environment | default('idr') + '-omero-hosts' }}" port: 9181 jobname: jmx-indexer - groupname: pixeldata - # groups: - # - "{{ idr_environment | default('idr') + '-omero-hosts' }}" + groups: + - "{{ idr_environment | default('idr') + '-omero-hosts' }}" port: 9182 jobname: jmx-pixeldata From 62a3bf868d3fd58da79676bc960f86222eb52596 Mon Sep 17 00:00:00 2001 From: Simon Li Date: Thu, 20 Jul 2017 17:44:14 +0100 Subject: [PATCH 05/30] Block /django_prometheus on the front-end --- ansible/group_vars/proxy-hosts.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ansible/group_vars/proxy-hosts.yml b/ansible/group_vars/proxy-hosts.yml index 780546b3..2ee00198 100644 --- a/ansible/group_vars/proxy-hosts.yml +++ b/ansible/group_vars/proxy-hosts.yml @@ -271,7 +271,8 @@ nginx_proxy_sites: "{{ _nginx_proxy_sites + (idr_proxy_additional_sites | defaul ###################################################################### # Other -#nginx_proxy_block_locations: +nginx_proxy_block_locations: +- "^~ /django_prometheus" #- "^~ /login" #nginx_proxy_set_header_host: 'idr.openmicroscopy.org' From 0c5ed87a947b502e269e15dc806e2e9fbd603c9b Mon Sep 17 00:00:00 2001 From: Simon Li Date: Thu, 20 Jul 2017 17:49:37 +0100 Subject: [PATCH 06/30] Use a fork of django-prometheus with multiprocessing enabled --- ansible/management-prometheus.yml | 36 +++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/ansible/management-prometheus.yml b/ansible/management-prometheus.yml index c0ccd07e..4ac48470 100644 --- a/ansible/management-prometheus.yml +++ b/ansible/management-prometheus.yml @@ -34,9 +34,10 @@ - name: omero-web django prometheus install become: yes pip: - name: django_prometheus + #name: django_prometheus + name: https://github.com/manics/django-prometheus/archive/1.0.9-ome1.zip state: present - version: 1.0.8 + #version: v1.0.8 virtualenv: "{{ omero_common_basedir }}/web/venv" virtualenv_site_packages: yes notify: @@ -49,10 +50,41 @@ config append -- omero.web.middleware '{"index":0, "class": "django_prometheus.middleware.PrometheusBeforeMiddleware"}' config append -- omero.web.middleware '{"index":1000, "class": "django_prometheus.middleware.PrometheusAfterMiddleware"}' config append -- omero.web.apps '"django_prometheus"' + + config set -- omero.web.wsgi_args '--config file:/opt/omero/web/config/gunicorn-config.py' dest: "{{ omero_common_basedir }}/web/config/django-prometheus.omero" notify: - restart omero-web + - name: omero-web gunicorn prometheus configure + become: yes + copy: + content: | + from prometheus_client import multiprocess + def child_exit(server, worker): + multiprocess.mark_process_dead(worker.pid) + dest: "{{ omero_common_basedir }}/web/config/gunicorn-config.py" + notify: + - restart omero-web + + - name: omero-web service.d directory + become: yes + file: + path: /etc/systemd/system/omero-web.service.d + state: directory + + - name: omero-web service.d prometheus + become: yes + copy: + content: | + [Service] + Environment="prometheus_multiproc_dir=/opt/omero/web/OMERO.web/var/prometheus" + ExecStartPre=/bin/sh -c '/usr/bin/rm -rf "$prometheus_multiproc_dir" && \ + /usr/bin/mkdir -p "$prometheus_multiproc_dir"' + dest: /etc/systemd/system/omero-web.service.d/prometheus.conf + notify: + - restart omero-web + vars: # prometheus-jmx automatically creates this: jmx_javaagent: /opt/prometheus/jars/jmx_prometheus_javaagent.jar From b62973cb6d3b406f23cf71f0e4626847853a1b26 Mon Sep 17 00:00:00 2001 From: Simon Li Date: Thu, 20 Jul 2017 17:54:16 +0100 Subject: [PATCH 07/30] Setup a small util for monitoring omero sessions --- ansible/management-prometheus.yml | 71 +++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/ansible/management-prometheus.yml b/ansible/management-prometheus.yml index 4ac48470..9f3b3cc8 100644 --- a/ansible/management-prometheus.yml +++ b/ansible/management-prometheus.yml @@ -90,6 +90,71 @@ jmx_javaagent: /opt/prometheus/jars/jmx_prometheus_javaagent.jar +# prometheus-omero-py session monitoring +# TODO: Consider moving this into a role +- hosts: "{{ idr_environment | default('idr') }}-omero-hosts" + + roles: + - role: openmicroscopy.versioncontrol-utils + + tasks: + + - name: prometheus-omero-py | install prometheus-omero-py + become: yes + git: + dest: /opt/prometheus-omero-py/src + force: yes + repo: https://gist.github.com/238a63cfd9c2fb8a450252d79e609296.git + version: HEAD + notify: + - restart prometheus-omero-py + + - name: prometheus-omero-py | setup virtualenv + become: yes + pip: + requirements: /opt/prometheus-omero-py/src/requirements.txt + state: present + virtualenv: /opt/prometheus-omero-py/venv + virtualenv_site_packages: yes + notify: + - restart prometheus-omero-py + + - name: prometheus-omero-py | systemd service + become: yes + copy: + dest: /etc/systemd/system/prometheus-omero-py.service + src: /opt/prometheus-omero-py/src/prometheus-omero-py-metrics.service + remote_src: True + notify: + - restart prometheus-omero-py + + - name: prometheus-omero-py | options + become: yes + copy: + content: > + OPTIONS="--host localhost --listen 9171 --verbose --interval 15" + dest: /etc/sysconfig/prometheus-omero-py + notify: + - restart prometheus-omero-py + + - name: prometheus-omero-py | enable service + become: yes + systemd: + daemon_reload: yes + enabled: yes + name: prometheus-omero-py + state: started + + handlers: + - name: restart prometheus-omero-py + become: yes + systemd: + daemon_reload: yes + enabled: yes + name: prometheus-omero-py + state: restarted + + - hosts: > {{ idr_environment | default('idr') }}-dockermanager-hosts {{ idr_environment | default('idr') }}-dockerworker-hosts @@ -159,6 +224,12 @@ jobname: django metrics_path: /django_prometheus/metrics + - groupname: omero-sessions + groups: + - "{{ idr_environment | default('idr') + '-omero-hosts' }}" + port: 9171 + jobname: omero-sessions + prometheus_http_2xx_internal_targets: > {{ (groups[(idr_environment | default('idr')) + '-omero-hosts'] + From 2e255689064628843a82a1ae431993b9d0a5672f Mon Sep 17 00:00:00 2001 From: Simon Li Date: Thu, 20 Jul 2017 17:56:41 +0100 Subject: [PATCH 08/30] Install grafana --- ansible/management-prometheus.yml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/ansible/management-prometheus.yml b/ansible/management-prometheus.yml index 9f3b3cc8..cbe9fe96 100644 --- a/ansible/management-prometheus.yml +++ b/ansible/management-prometheus.yml @@ -249,3 +249,22 @@ prometheus_rsync_banner_targets: - idr.openmicroscopy.org:873 + + +# Grafana +- hosts: "{{ idr_environment | default('idr') }}-management-hosts" + + tasks: + + - name: run grafana in docker + become: yes + docker_container: + image: grafana/grafana + links: + - prometheus:prometheus + name: grafana + published_ports: + - "3000:3000" + state: started + volumes: + - /data/grafana:/var/lib/grafana From 4e3ba94452f45cc5fc19df7cf04b872ab8b019c2 Mon Sep 17 00:00:00 2001 From: Simon Li Date: Thu, 20 Jul 2017 17:59:30 +0100 Subject: [PATCH 09/30] Use independent redis for each OMERO.web --- ansible/idr-omero.yml | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/ansible/idr-omero.yml b/ansible/idr-omero.yml index c5cf50ee..e0622c93 100644 --- a/ansible/idr-omero.yml +++ b/ansible/idr-omero.yml @@ -58,11 +58,6 @@ - role: openmicroscopy.omero-server - # Use a single redis server for shared web sessions - # TODO: Add omeroweb hosts group - - role: openmicroscopy.redis - redis_listen: 0.0.0.0 - environment: "{{ idr_ANSIBLE_ENVIRONMENT_VARIABLES | default({}) }}" @@ -70,18 +65,8 @@ - hosts: > {{ idr_environment | default('idr') }}-omero-hosts - pre_tasks: - - name: Get redis IP - set_fact: - omero_redis_host_ansible: >- - {{ - hostvars[groups[ - idr_environment | default('idr') + '-omeroreadwrite-hosts'][0]] - ['ansible_' + (idr_net_iface | default('eth0'))]['ipv4']['address'] - }} - when: "{{ groups[idr_environment | default('idr') + '-omeroreadwrite-hosts'] is defined }}" - roles: + - role: openmicroscopy.redis - role: openmicroscopy.omero-web - role: openmicroscopy.omero-web-apps From 1fc3b401b6712d9d543eb2b278f2c23510a066e7 Mon Sep 17 00:00:00 2001 From: Simon Li Date: Thu, 20 Jul 2017 18:00:52 +0100 Subject: [PATCH 10/30] Send omero-logmonitor to separate slack per idr-env --- ansible/management.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/management.yml b/ansible/management.yml index 0c145929..da75a8b7 100644 --- a/ansible/management.yml +++ b/ansible/management.yml @@ -81,7 +81,7 @@ roles: - role: openmicroscopy.omero-logmonitor omero_logmonitor_slack_token: "{{ idr_secret_omero_logmonitor_slack_token | default(None) }}" - omero_logmonitor_slack_channel: "#idr-logs" + omero_logmonitor_slack_channel: idr-logs-{{ idr_environment | default('idr') }} # Docker slack notifications From 23e6b82991cdec9d571b1e5693bde65c27002700 Mon Sep 17 00:00:00 2001 From: Simon Li Date: Thu, 20 Jul 2017 19:39:26 +0100 Subject: [PATCH 11/30] Disable prometheus monitoring of indexer pixeldata Currently broken in the metadata53 branch --- ansible/management-prometheus.yml | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/ansible/management-prometheus.yml b/ansible/management-prometheus.yml index cbe9fe96..7bc1dfa7 100644 --- a/ansible/management-prometheus.yml +++ b/ansible/management-prometheus.yml @@ -198,17 +198,20 @@ port: 9180 jobname: jmx-blitz - - groupname: indexer - groups: - - "{{ idr_environment | default('idr') + '-omero-hosts' }}" - port: 9181 - jobname: jmx-indexer - - - groupname: pixeldata - groups: - - "{{ idr_environment | default('idr') + '-omero-hosts' }}" - port: 9182 - jobname: jmx-pixeldata + # TODO: indexer and pixeldata are currently broken (fail to start)on + # metadata53 due to the readonly work. These two targets should be + # reenabled when they are fixed: + # - groupname: indexer + # groups: + # - "{{ idr_environment | default('idr') + '-omero-hosts' }}" + # port: 9181 + # jobname: jmx-indexer + # + # - groupname: pixeldata + # groups: + # - "{{ idr_environment | default('idr') + '-omero-hosts' }}" + # port: 9182 + # jobname: jmx-pixeldata - groupname: cadvisor groups: From 6224d8f80ca0fd7f1d1c77db9fe95c3524325f79 Mon Sep 17 00:00:00 2001 From: Simon Li Date: Thu, 20 Jul 2017 19:41:30 +0100 Subject: [PATCH 12/30] Use a shared /data/idr-metadata directory (empty by default) --- ansible/idr-omero-readonly.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/ansible/idr-omero-readonly.yml b/ansible/idr-omero-readonly.yml index 6020a0b4..97c4bcbb 100644 --- a/ansible/idr-omero-readonly.yml +++ b/ansible/idr-omero-readonly.yml @@ -7,6 +7,12 @@ # OMERO read-write fileserver for OMERO read-only - hosts: "{{ idr_environment | default('idr') }}-omeroreadwrite-hosts" + pre_tasks: + - name: Create idr-metadata directory + file: + path: /data/idr-metadata + state: directory + roles: - role: openmicroscopy.nfs-share @@ -18,6 +24,9 @@ # TODO: Limit which hosts can write to this dir - host: "*" options: 'rw' + /data/idr-metadata: + - host: "*" + options: 'ro' # Include restart handlers - role: openmicroscopy.omero-common @@ -53,6 +62,9 @@ - path: /data/BioFormatsCache location: "{{ omero_fileserver_host_ansible }}:/data/BioFormatsCache" opts: rw,sync + - path: /data/idr-metadata + location: "{{ omero_fileserver_host_ansible }}:/data/idr-metadata" + opts: ro # Include restart handlers - role: openmicroscopy.omero-common From 695d990db379152592358912fc82c8d01330cc84 Mon Sep 17 00:00:00 2001 From: Simon Li Date: Thu, 20 Jul 2017 20:36:13 +0100 Subject: [PATCH 13/30] Use devel version of nginx-proxy role --- ansible/requirements.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/ansible/requirements.yml b/ansible/requirements.yml index 62504c60..caf63743 100644 --- a/ansible/requirements.yml +++ b/ansible/requirements.yml @@ -66,8 +66,11 @@ - src: openmicroscopy.nginx version: 1.0.0 -- src: openmicroscopy.nginx-proxy - version: 1.3.0 +#- src: openmicroscopy.nginx-proxy +# version: 1.3.0 +- name: openmicroscopy.nginx-proxy + src: https://github.com/manics/ansible-role-nginx-proxy/archive/nginx_proxy_block_locations.tar.gz + verison: nginx_proxy_block_locations - src: openmicroscopy.nginx-ssl-selfsigned version: 1.0.0 From aaa008bf10dbf60fe757c8f06a015c0206aed5a4 Mon Sep 17 00:00:00 2001 From: Simon Li Date: Thu, 20 Jul 2017 20:50:25 +0100 Subject: [PATCH 14/30] Allow pinning of kubeadm version --- ansible/roles/kubernetes/defaults/main.yml | 2 ++ ansible/roles/kubernetes/tasks/common.yml | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/ansible/roles/kubernetes/defaults/main.yml b/ansible/roles/kubernetes/defaults/main.yml index 9afdafc0..31a9ef62 100644 --- a/ansible/roles/kubernetes/defaults/main.yml +++ b/ansible/roles/kubernetes/defaults/main.yml @@ -23,3 +23,5 @@ kubernetes_init_args: >- , '') }} kubernetes_join_args: --token {{ kubernetes_token }} {{ kubernetes_master }} + +kubernetes_kube_version: '' diff --git a/ansible/roles/kubernetes/tasks/common.yml b/ansible/roles/kubernetes/tasks/common.yml index e25065fe..44abaadb 100644 --- a/ansible/roles/kubernetes/tasks/common.yml +++ b/ansible/roles/kubernetes/tasks/common.yml @@ -46,9 +46,9 @@ become: yes yum: name: - - kubeadm - - kubectl - - kubelet + - kubeadm{{ (kubernetes_kube_version | length > 0) | ternary('-', '') + kubernetes_kube_version }} + - kubectl{{ (kubernetes_kube_version | length > 0) | ternary('-', '') + kubernetes_kube_version }} + - kubelet{{ (kubernetes_kube_version | length > 0) | ternary('-', '') + kubernetes_kube_version }} - kubernetes-cni state: present From 4446952837c2ba7ee0a6bf5d88bb5a69098640e2 Mon Sep 17 00:00:00 2001 From: Simon Li Date: Fri, 21 Jul 2017 10:25:20 +0100 Subject: [PATCH 15/30] Pin kubernetes/kubeadm version --- ansible/group_vars/dockermanager-hosts.yml | 2 ++ ansible/group_vars/dockerworker-hosts.yml | 2 ++ ansible/idr-kubernetes.yml | 2 ++ 3 files changed, 6 insertions(+) diff --git a/ansible/group_vars/dockermanager-hosts.yml b/ansible/group_vars/dockermanager-hosts.yml index da25c473..eae958e9 100644 --- a/ansible/group_vars/dockermanager-hosts.yml +++ b/ansible/group_vars/dockermanager-hosts.yml @@ -1,5 +1,7 @@ docker_use_ipv4_nic_mtu: True +kubernetes_kube_version: 1.7.0 + idr_jupyter_hub_image: "imagedata/jupyterhub-githubauth:0.4.0" idr_jupyter_notebook_image: "imagedata/jupyter-docker:develop" idr_jupyter_notebook_repo: https://github.com/IDR/idr-notebooks.git diff --git a/ansible/group_vars/dockerworker-hosts.yml b/ansible/group_vars/dockerworker-hosts.yml index 8c098f3d..320f0541 100644 --- a/ansible/group_vars/dockerworker-hosts.yml +++ b/ansible/group_vars/dockerworker-hosts.yml @@ -1,4 +1,6 @@ docker_use_ipv4_nic_mtu: True +kubernetes_kube_version: 1.7.0 + idr_jupyter_hub_image: "imagedata/jupyterhub-githubauth:0.4.0" idr_jupyter_notebook_image: "imagedata/jupyter-docker:develop" diff --git a/ansible/idr-kubernetes.yml b/ansible/idr-kubernetes.yml index 2d96b863..3fd50b6f 100644 --- a/ansible/idr-kubernetes.yml +++ b/ansible/idr-kubernetes.yml @@ -1,3 +1,5 @@ +# Setup a kubernetes cluster +# idr-docker.yml must be run first --- - hosts: > From 9f427a8ad1407374389f57b8389598cc63ddb506 Mon Sep 17 00:00:00 2001 From: Simon Li Date: Fri, 21 Jul 2017 11:04:42 +0100 Subject: [PATCH 16/30] Update delete script with omero cluster and flat net --- scripts/os-idr-delete.sh | 47 ++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/scripts/os-idr-delete.sh b/scripts/os-idr-delete.sh index 3eace811..2d3b9a2c 100755 --- a/scripts/os-idr-delete.sh +++ b/scripts/os-idr-delete.sh @@ -6,42 +6,41 @@ set -eu idr_environment=$1 -for server in proxy omero database management; do - echo openstack server delete ${idr_environment}-${server} -done -echo -for server in omero database dockermanager dockerworker; do - echo openstack server delete ${idr_environment}-a-${server} +for server in \ + proxy \ + omeroreadonly-1 \ + omeroreadonly-2 \ + omeroreadwrite \ + database \ + dockermanager \ + dockerworker-1 \ + dockerworker-2 \ + management \ + ; do + echo openstack server delete ${idr_environment}-${server} done echo -for volume in proxy-nginxcache omero-data database-db; do +for volume in \ + proxy-nginxcache \ + omeroreadwrite-data \ + database-db \ + dockermanager-data \ + ; do echo openstack volume delete ${idr_environment}-${volume} done echo -for volume in omero-data database-db dockermanager-data; do - echo openstack volume delete ${idr_environment}-a-${volume} -done -echo - -for router in ${idr_environment}-router ${idr_environment}-a-router; do - echo "for port in \$(openstack port list --router ${router} -f value | cut -d\ -f1); do" - echo " openstack router remove port ${router} \$port" - echo "done" - echo -done +echo "for port in \$(openstack port list --router ${idr_environment}-router -f value | cut -d\ -f1); do" +echo " openstack router remove port ${idr_environment}-router \$port" +echo "done" echo -for router in ${idr_environment}-router ${idr_environment}-a-router; do - echo openstack router delete ${router} -done +echo openstack router delete ${idr_environment}-router echo -for network in ${idr_environment} ${idr_environment}-a; do - echo openstack network delete ${network} -done +echo openstack network delete ${idr_environment} echo echo openstack server list -f yaml From 939187ac35e8c19873f83ead83e1c566b78d6737 Mon Sep 17 00:00:00 2001 From: Simon Li Date: Mon, 24 Jul 2017 10:55:24 +0100 Subject: [PATCH 17/30] Use env specific idr-logs and idr-notify slack channels --- ansible/management.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/management.yml b/ansible/management.yml index da75a8b7..3158b320 100644 --- a/ansible/management.yml +++ b/ansible/management.yml @@ -12,7 +12,7 @@ - role: openmicroscopy.basedeps # munin requires EPEL - role: openmicroscopy.munin munin_slack_token: "{{ idr_secret_management_slack_token | default(None) }}" - munin_slack_channel: "#idr-notify" + munin_slack_channel: "#idr-notify-{{ idr_environment | default('idr') }}" munin_slack_username: "{{ idr_environment | default('idr') }} Munin Notification" munin_slack_emoji: ":pony:" munin_slack_url: http://{{ ansible_host }}/munin/problems.html @@ -91,7 +91,7 @@ roles: - role: openmicroscopy.docker-slack-notifier - docker_slack_notifier_channel: "#idr-deployment" + docker_slack_notifier_channel: "#idr-logs-{{ idr_environment | default('idr') }}" docker_slack_notifier_username: "{{ ansible_hostname }}" docker_slack_notifier_icon: ":docker:" docker_slack_notifier_token: "{{ idr_secret_management_slack_webhook | default(None) }}" From 0b3f857b326c79425d1a077dfb49655370801109 Mon Sep 17 00:00:00 2001 From: Simon Li Date: Mon, 24 Jul 2017 11:08:37 +0100 Subject: [PATCH 18/30] Add copy of current grafana dashboards --- .../grafana-dashboards/idr-per-server.json | 195 +++++++ ansible/grafana-dashboards/idr-sessions.json | 471 +++++++++++++++++ ansible/grafana-dashboards/idr-vertical.json | 498 ++++++++++++++++++ 3 files changed, 1164 insertions(+) create mode 100644 ansible/grafana-dashboards/idr-per-server.json create mode 100644 ansible/grafana-dashboards/idr-sessions.json create mode 100644 ansible/grafana-dashboards/idr-vertical.json diff --git a/ansible/grafana-dashboards/idr-per-server.json b/ansible/grafana-dashboards/idr-per-server.json new file mode 100644 index 00000000..71bcec43 --- /dev/null +++ b/ansible/grafana-dashboards/idr-per-server.json @@ -0,0 +1,195 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "Prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "4.4.1" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + } + ], + "annotations": { + "list": [] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [], + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 0, + "id": 1, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 3, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "hostname", + "seriesOverrides": [], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(1 - node_filesystem_free{fstype!~\"(nfs|nfs4|overlay|rootfs|rpc_pipefs|tmpfs)\", instance=\"$hostname\"} / node_filesystem_size{fstype!~\"(nfs|nfs4|overlay|rootfs|rpc_pipefs|tmpfs)\", instance=\"$hostname\"}) * 100", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{mountpoint}} ({{device}})", + "metric": "", + "refId": "A", + "step": 120 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$hostname disk", + "tooltip": { + "shared": true, + "sort": 1, + "value_type": "individual" + }, + "transparent": false, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": "Used space", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "hide": 0, + "includeAll": true, + "label": "Hostname", + "multi": true, + "name": "hostname", + "options": [], + "query": "label_values(node_exporter_build_info, instance)", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "IDR per server", + "version": 2 +} \ No newline at end of file diff --git a/ansible/grafana-dashboards/idr-sessions.json b/ansible/grafana-dashboards/idr-sessions.json new file mode 100644 index 00000000..c10b49ba --- /dev/null +++ b/ansible/grafana-dashboards/idr-sessions.json @@ -0,0 +1,471 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "Prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "4.4.1" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + } + ], + "annotations": { + "list": [] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [], + "refresh": "5s", + "rows": [ + { + "collapse": false, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "decimals": 2, + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "repeat": "hostname", + "seriesOverrides": [ + { + "alias": "Number of requests", + "dashes": true, + "legend": false, + "lines": false, + "yaxis": 2 + } + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.50, sum(rate(django_http_requests_latency_including_middlewares_seconds_bucket{instance=\"$hostname\"}[$quantileint])) by (le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "50", + "refId": "A", + "step": 4 + }, + { + "expr": "histogram_quantile(0.90, sum(rate(django_http_requests_latency_including_middlewares_seconds_bucket{instance=\"$hostname\"}[$quantileint])) by (le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "90", + "refId": "B", + "step": 4 + }, + { + "expr": "histogram_quantile(0.95, sum(rate(django_http_requests_latency_including_middlewares_seconds_bucket{instance=\"$hostname\"}[$quantileint])) by (le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "95", + "refId": "C", + "step": 4 + }, + { + "expr": "histogram_quantile(0.99, sum(rate(django_http_requests_latency_including_middlewares_seconds_bucket{instance=\"$hostname\"}[$quantileint])) by (le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "99", + "refId": "D", + "step": 4 + }, + { + "expr": "histogram_quantile(0.999, sum(rate(django_http_requests_latency_including_middlewares_seconds_bucket{instance=\"$hostname\"}[$quantileint])) by (le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "99.9", + "refId": "E", + "step": 4 + }, + { + "expr": "rate(django_http_requests_latency_including_middlewares_seconds_count{instance=\"$hostname\"}[$quantileint])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Number of requests", + "refId": "F", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Web latency ($hostname)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": "Latency", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "none", + "label": "Requests /s", + "logBase": 1, + "max": null, + "min": "0", + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 0, + "id": 1, + "legend": { + "alignAsTable": false, + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 3, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "hostname", + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "omero_sessions_active{instance=\"$hostname\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{username}}", + "metric": "omero_sessions_active", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "OMERO.server sessions ($hostname)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "id": 9, + "legend": { + "alignAsTable": false, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(django_http_responses_total_by_status[$quantileint])) without (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{status}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Django request status", + "tooltip": { + "shared": true, + "sort": 1, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "Requests /s", + "logBase": 10, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "hide": 0, + "includeAll": true, + "label": "Hostname", + "multi": true, + "name": "hostname", + "options": [], + "query": "label_values(node_exporter_build_info, instance)", + "refresh": 1, + "regex": "/(.*omero.*)/", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "2m", + "value": "2m" + }, + "hide": 0, + "label": "Quantile interval", + "name": "quantileint", + "options": [ + { + "selected": false, + "text": "1m", + "value": "1m" + }, + { + "selected": true, + "text": "2m", + "value": "2m" + }, + { + "selected": false, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "15m", + "value": "15m" + }, + { + "selected": false, + "text": "30m", + "value": "30m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + } + ], + "query": "1m,2m,5m,15m,30m,1h", + "refresh": 2, + "type": "interval" + } + ] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "IDR sessions", + "version": 8 +} \ No newline at end of file diff --git a/ansible/grafana-dashboards/idr-vertical.json b/ansible/grafana-dashboards/idr-vertical.json new file mode 100644 index 00000000..ceac8dd8 --- /dev/null +++ b/ansible/grafana-dashboards/idr-vertical.json @@ -0,0 +1,498 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "Prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "4.4.1" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + } + ], + "annotations": { + "list": [] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [], + "refresh": false, + "rows": [ + { + "collapse": false, + "height": 259, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "description": "CPU usage summed over all CPUs", + "fill": 1, + "id": 1, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(100 - (avg by (instance) (irate(node_cpu{mode=\"idle\", instance=~\".*$servergroup.*\"}[5m])) * 100)) * on(instance) (count(node_cpu{mode=\"idle\", instance=~\".*$servergroup.*\"}) without (cpu, mode))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "metric": "", + "refId": "A", + "step": 600 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$servergroup CPU", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": "CPU (Sum)", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 0, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "/.*[^%]$/", + "linewidth": 3 + }, + { + "alias": "/.*%$/", + "dashes": true, + "legend": false, + "yaxis": 2 + } + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_Active{instance=~\".*$servergroup.*\"} / 1024 / 1024 / 2014", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A", + "step": 600 + }, + { + "expr": "node_memory_Active{instance=~\".*$servergroup.*\"} / node_memory_MemTotal{instance=~\".*$servergroup.*\"} * 100", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{instance}} %", + "refId": "C", + "step": 600 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$servergroup memory", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "Active memory GB", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": "Active memory %", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_load1{instance=~\".*$servergroup.*\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "metric": "", + "refId": "A", + "step": 600 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$servergroup load", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "Load 1", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": "servergroup", + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 180, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "description": "Network IO (+ve=receive, -ve=transmit)", + "fill": 1, + "id": 18, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "servergroup", + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum (irate(node_network_receive_bytes{instance=~\".*$servergroup.*\"}[1m])) by (instance) / 1024 / 1024", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{instance}} +", + "metric": "", + "refId": "A", + "step": 600 + }, + { + "expr": "- sum (irate(node_network_transmit_bytes{instance=~\".*$servergroup.*\"}[1m])) by (instance) / 1024 / 1024", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{instance}} -", + "metric": "", + "refId": "B", + "step": 600 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$servergroup Network", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "mbytes", + "label": "Network +rx -tx", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "tags": [], + "text": "omero + database + docker", + "value": [ + "omero", + "database", + "docker" + ] + }, + "hide": 0, + "includeAll": false, + "label": "Servers", + "multi": true, + "name": "servergroup", + "options": [ + { + "selected": true, + "text": "omero", + "value": "omero" + }, + { + "selected": true, + "text": "database", + "value": "database" + }, + { + "selected": true, + "text": "docker", + "value": "docker" + } + ], + "query": "omero, database, docker", + "type": "custom" + } + ] + }, + "time": { + "from": "now-24h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "IDR vertical", + "version": 2 +} \ No newline at end of file From f885a294e15c74a8a5c41b08c5c78bf93e5b1fb2 Mon Sep 17 00:00:00 2001 From: Simon Li Date: Mon, 24 Jul 2017 12:05:37 +0100 Subject: [PATCH 19/30] Attempt to setup grafana dashboards using grafana API --- ansible/management-grafana.yml | 147 ++++++++++++++++++++++++++++++ ansible/management-prometheus.yml | 19 ---- 2 files changed, 147 insertions(+), 19 deletions(-) create mode 100644 ansible/management-grafana.yml diff --git a/ansible/management-grafana.yml b/ansible/management-grafana.yml new file mode 100644 index 00000000..bdbfd945 --- /dev/null +++ b/ansible/management-grafana.yml @@ -0,0 +1,147 @@ +# Setup grafana docker, user and dashboard +# Assumes there is already a local docker container called prometheus +# as a datasource + +- hosts: "{{ idr_environment | default('idr') }}-management-hosts" + + tasks: + + - name: Run docker grafana + become: yes + docker_container: + image: grafana/grafana + links: + - prometheus:prometheus + name: grafana + published_ports: + - "3000:3000" + state: started + volumes: + - /data/grafana:/var/lib/grafana + + - name: Wait for grafana + wait_for: + port: 3000 + + # Port 3000 may be open before Grafana is ready, so retry a few times + - name: Check user + uri: + url: "{{ grafana_host }}/api/users/lookup?loginOrEmail=idr" + user: "{{ grafana_admin_user }}" + password: "{{ grafana_admin_password }}" + force_basic_auth: yes + status_code: [200, 404] + register: grafana_get_user + check_mode: no + until: grafana_get_user | succeeded + retries: 5 + delay: 5 + + - name: Create user + uri: + url: "{{ grafana_host }}/api/admin/users" + user: "{{ grafana_admin_user }}" + password: "{{ grafana_admin_password }}" + force_basic_auth: yes + method: POST + body: + name: idr + email: idr@openmicroscopy.org + login: idr + password: idr123 + body_format: json + status_code: 200 + register: grafana_create_user + when: grafana_get_user.status == 404 + + - name: Check datasource + uri: + url: "{{ grafana_host }}/api/datasources/name/prometheus" + user: "{{ grafana_admin_user }}" + password: "{{ grafana_admin_password }}" + force_basic_auth: yes + status_code: [200, 404] + register: grafana_get_datasource + check_mode: no + + - name: Create datasource + uri: + url: "{{ grafana_host }}/api/datasources" + user: "{{ grafana_admin_user }}" + password: "{{ grafana_admin_password }}" + force_basic_auth: yes + method: POST + body: + name: prometheus + type: prometheus + url: "http://prometheus:9090" + access: proxy + isDefault: true + body_format: json + status_code: 200 + when: grafana_get_datasource.status == 404 + + - name: Create dashboards directory + become: yes + file: + path: "{{ grafana_dashboard_json_dir }}" + recurse: yes + + - name: Copy dashboards + become: yes + copy: + src: grafana-dashboards/{{ item }}.json + dest: "{{ grafana_dashboard_json_dir }}/{{ item }}.json" + with_items: + - "{{ grafana_dashboard_list }}" + + - name: Check dashboard + uri: + url: "{{ grafana_host }}/api/dashboards/db/{{ item }}" + user: "{{ grafana_admin_user }}" + password: "{{ grafana_admin_password }}" + force_basic_auth: yes + status_code: [200, 404] + with_items: + - "{{ grafana_dashboard_list }}" + register: grafana_get_dashboard + check_mode: no + + - name: Read dashboard json + slurp: + src: "{{ grafana_dashboard_json_dir }}/{{ item.item }}.json" + register: grafana_dashboard_json + when: item.status == 404 + with_items: + - "{{ grafana_get_dashboard.results }}" + + - name: Create dashboard + uri: + url: "{{ grafana_host }}/api/dashboards/db" + user: "{{ grafana_admin_user }}" + password: "{{ grafana_admin_password }}" + force_basic_auth: yes + method: POST + body: + dashboard: > + {{ item.content | + b64decode | + regex_replace('\$\{DS_PROMETHEUS\}', 'prometheus') | + from_json + }} + overwrite: True + body_format: json + status_code: 200 + when: item.content is defined + with_items: + - "{{ grafana_dashboard_json.results }}" + + vars: + grafana_dashboard_json_dir: /opt/grafana/dashboards + grafana_host: "http://localhost:3000" + grafana_admin_user: admin + grafana_admin_password: admin + grafana_dashboard_list: + - idr-per-server + - idr-sessions + - idr-vertical diff --git a/ansible/management-prometheus.yml b/ansible/management-prometheus.yml index 7bc1dfa7..564098f4 100644 --- a/ansible/management-prometheus.yml +++ b/ansible/management-prometheus.yml @@ -252,22 +252,3 @@ prometheus_rsync_banner_targets: - idr.openmicroscopy.org:873 - - -# Grafana -- hosts: "{{ idr_environment | default('idr') }}-management-hosts" - - tasks: - - - name: run grafana in docker - become: yes - docker_container: - image: grafana/grafana - links: - - prometheus:prometheus - name: grafana - published_ports: - - "3000:3000" - state: started - volumes: - - /data/grafana:/var/lib/grafana From 983757b4c2b771ee0f2b20c381ba9a4d9e777a4f Mon Sep 17 00:00:00 2001 From: Simon Li Date: Tue, 25 Jul 2017 11:35:24 +0100 Subject: [PATCH 20/30] Use omero.web.public.cache with 60s timeout --- ansible/group_vars/omero-hosts.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ansible/group_vars/omero-hosts.yml b/ansible/group_vars/omero-hosts.yml index 5216b501..3f0a1893 100644 --- a/ansible/group_vars/omero-hosts.yml +++ b/ansible/group_vars/omero-hosts.yml @@ -137,6 +137,12 @@ omero_web_config_set: omero.web.ui.metadata_panes: [] omero.web.wsgi_timeout: "{{ idr_omero_web_timeout | default(60) }}" +# TODO: This needs careful review of the code +# https://github.com/openmicroscopy/openmicroscopy/blob/v5.4.0-m2/components/tools/OmeroWeb/omeroweb/decorators.py#L305 + omero.web.public.cache.enabled: True + omero.web.public.cache.timeout: 60 + + ###################################################################### # Plugins and additional web configuration From 71d2bf6c721a3a8f1826f3ae12ca8cb2a07d579d Mon Sep 17 00:00:00 2001 From: Simon Li Date: Tue, 25 Jul 2017 11:38:18 +0100 Subject: [PATCH 21/30] Remove duplicate var --- ansible/group_vars/omero-hosts.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/ansible/group_vars/omero-hosts.yml b/ansible/group_vars/omero-hosts.yml index 3f0a1893..fab56d12 100644 --- a/ansible/group_vars/omero-hosts.yml +++ b/ansible/group_vars/omero-hosts.yml @@ -12,7 +12,6 @@ idr_omero_ice_version: "3.6" idr_omero_upgrade: False # The IDR version system doesn't match the OMERO.server version omero_server_checkupgrade_comparator: '!=' -idr_omero_upgrade: False idr_omero_omego_additional_args: "--downloadurl https://downloads.openmicroscopy.org/idr" # If you want to speed up Ice installation use a precompiled IcePy: From 724d9ad1556295274c70c347cf6379352ea20523 Mon Sep 17 00:00:00 2001 From: Simon Li Date: Tue, 25 Jul 2017 11:39:28 +0100 Subject: [PATCH 22/30] Sort omero_omeroreadonly_hosts --- ansible/group_vars/proxy-hosts.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/group_vars/proxy-hosts.yml b/ansible/group_vars/proxy-hosts.yml index 2ee00198..159af9f1 100644 --- a/ansible/group_vars/proxy-hosts.yml +++ b/ansible/group_vars/proxy-hosts.yml @@ -27,7 +27,7 @@ nginx_proxy_websockets_enable: True nginx_proxy_upstream_servers: - name: omeroreadonly balance: ip_hash - servers: "{{ omero_omeroreadonly_hosts }}" + servers: "{{ omero_omeroreadonly_hosts | sort }}" - name: omeroreadwrite servers: "{{ omero_omeroreadwrite_hosts }}" From 924d179e7ccc7e95f6817053ba5201476157bc5b Mon Sep 17 00:00:00 2001 From: Simon Li Date: Tue, 25 Jul 2017 15:19:50 +0100 Subject: [PATCH 23/30] Increase nginx omero.web timeout on readwrite --- ansible/idr-omero.yml | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/ansible/idr-omero.yml b/ansible/idr-omero.yml index e0622c93..4a5fca05 100644 --- a/ansible/idr-omero.yml +++ b/ansible/idr-omero.yml @@ -73,3 +73,27 @@ # Vars are in group_vars/omero-hosts.yml environment: "{{ idr_ANSIBLE_ENVIRONMENT_VARIABLES | default({}) }}" + + +# TODO: Replace with a template using +# https://github.com/openmicroscopy/openmicroscopy/pull/5387 +- hosts: "{{ idr_environment | default('idr') }}-omeroreadwrite-hosts" + + tasks: + - name: Set Nginx proxy timeout + become: yes + lineinfile: + insertafter: 'server\s*{' + path: /etc/nginx/conf.d/omero-web.conf + line: proxy_read_timeout {{ idr_omero_web_timeout }}; + regexp: proxy_read_timeout\s+.* + state: present + notify: + - restart nginx + + handlers: + - name: restart nginx + become: yes + service: + name: nginx + state: restarted From c7b7a6b36991110366ff4a7eb370bccd36956d71 Mon Sep 17 00:00:00 2001 From: Simon Li Date: Thu, 3 Aug 2017 10:31:18 +0100 Subject: [PATCH 24/30] Use a fixed vesrion of prometheus-omero-py gist This avoids an ansible-lint error --- ansible/management-prometheus.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ansible/management-prometheus.yml b/ansible/management-prometheus.yml index 564098f4..27ef24bc 100644 --- a/ansible/management-prometheus.yml +++ b/ansible/management-prometheus.yml @@ -99,13 +99,15 @@ tasks: + # TODO: If we keep this move to a separate repo - name: prometheus-omero-py | install prometheus-omero-py become: yes git: dest: /opt/prometheus-omero-py/src force: yes repo: https://gist.github.com/238a63cfd9c2fb8a450252d79e609296.git - version: HEAD + #version: HEAD + version: dbcb32b5bd212ff9dd1060808f2fa850194ab2d1 notify: - restart prometheus-omero-py From 26e302180fef7481c905b4eb651c34f9ed7879f8 Mon Sep 17 00:00:00 2001 From: Simon Li Date: Fri, 4 Aug 2017 12:07:14 +0100 Subject: [PATCH 25/30] Use released nginx-proxy ansible role 1.5.1 --- ansible/requirements.yml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/ansible/requirements.yml b/ansible/requirements.yml index caf63743..5526ebc5 100644 --- a/ansible/requirements.yml +++ b/ansible/requirements.yml @@ -66,11 +66,8 @@ - src: openmicroscopy.nginx version: 1.0.0 -#- src: openmicroscopy.nginx-proxy -# version: 1.3.0 -- name: openmicroscopy.nginx-proxy - src: https://github.com/manics/ansible-role-nginx-proxy/archive/nginx_proxy_block_locations.tar.gz - verison: nginx_proxy_block_locations +- src: openmicroscopy.nginx-proxy + version: 1.5.1 - src: openmicroscopy.nginx-ssl-selfsigned version: 1.0.0 From aa6eb7cc03ddebe2a222dc7a6e7ab44eeac6474a Mon Sep 17 00:00:00 2001 From: Simon Li Date: Thu, 10 Aug 2017 16:27:13 +0100 Subject: [PATCH 26/30] Use a copy of https://github.com/korfuri/django-prometheus/pull/46 --- ansible/management-prometheus.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ansible/management-prometheus.yml b/ansible/management-prometheus.yml index 27ef24bc..77e714da 100644 --- a/ansible/management-prometheus.yml +++ b/ansible/management-prometheus.yml @@ -35,7 +35,8 @@ become: yes pip: #name: django_prometheus - name: https://github.com/manics/django-prometheus/archive/1.0.9-ome1.zip + #name: https://github.com/manics/django-prometheus/archive/1.0.9-ome1.zip + name: https://github.com/manics/django-prometheus/archive/pr46.zip state: present #version: v1.0.8 virtualenv: "{{ omero_common_basedir }}/web/venv" From 2b27322aa71a84ea13caf2033402bc632556c7d1 Mon Sep 17 00:00:00 2001 From: Simon Li Date: Thu, 10 Aug 2017 16:27:45 +0100 Subject: [PATCH 27/30] Add/bump ansible role requirements --- ansible/requirements.yml | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/ansible/requirements.yml b/ansible/requirements.yml index 5526ebc5..7da50d13 100644 --- a/ansible/requirements.yml +++ b/ansible/requirements.yml @@ -37,7 +37,7 @@ version: 2.0.1 - src: openmicroscopy.jekyll-build - version: 1.1.1 + version: 1.2.0 - src: openmicroscopy.local-accounts version: 1.0.1 @@ -99,8 +99,12 @@ - src: openmicroscopy.openstack-volume-storage version: 1.1.0 -- src: openmicroscopy.postgresql - version: 2.0.0 +# TODO: Update and tag +#- src: openmicroscopy.postgresql +# version: 2.0.0 +- name: openmicroscopy.postgresql + src: https://github.com/manics/ansible-role-postgresql/archive/pg-tune.tar.gz + version: pg-tune - src: openmicroscopy.python-pydata version: 1.0.0 @@ -172,9 +176,13 @@ # TODO: Update and tag - name: openmicroscopy.prometheus-jmx - src: https://github.com/manics/ansible-role-prometheus-jmx/archive/jmx_prometheus_javaagent_head.tar.gz - version: jmx_prometheus_javaagent_head + src: https://github.com/manics/ansible-role-prometheus-jmx/archive/jmx_prometheus_javaagent_0_10.tar.gz + version: jmx_prometheus_javaagent_0_10 - name: openmicroscopy.prometheus-node src: https://github.com/openmicroscopy/ansible-role-prometheus-node/archive/0.1.1.tar.gz version: 0.1.1 + +- name: openmicroscopy.prometheus-postgres + src: https://github.com/openmicroscopy/ansible-role-prometheus-postgres/archive/0.1.0.tar.gz + version: 0.1.0 From 029b6f9beb06a17fb8dd9b78436b30e419a29063 Mon Sep 17 00:00:00 2001 From: Simon Li Date: Thu, 24 Aug 2017 11:45:29 +0100 Subject: [PATCH 28/30] Use IDR/django-prometheus fork --- ansible/management-prometheus.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ansible/management-prometheus.yml b/ansible/management-prometheus.yml index 77e714da..ae860581 100644 --- a/ansible/management-prometheus.yml +++ b/ansible/management-prometheus.yml @@ -35,10 +35,9 @@ become: yes pip: #name: django_prometheus - #name: https://github.com/manics/django-prometheus/archive/1.0.9-ome1.zip - name: https://github.com/manics/django-prometheus/archive/pr46.zip + name: https://github.com/IDR/django-prometheus/archive/v1.0.10-IDR1.zip state: present - #version: v1.0.8 + #version: v virtualenv: "{{ omero_common_basedir }}/web/venv" virtualenv_site_packages: yes notify: From efc545ad8473dd2e429ae99774dea9f459359eaa Mon Sep 17 00:00:00 2001 From: Simon Li Date: Thu, 24 Aug 2017 11:45:57 +0100 Subject: [PATCH 29/30] Bump requirements --- ansible/requirements.yml | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/ansible/requirements.yml b/ansible/requirements.yml index 7da50d13..13788031 100644 --- a/ansible/requirements.yml +++ b/ansible/requirements.yml @@ -99,12 +99,8 @@ - src: openmicroscopy.openstack-volume-storage version: 1.1.0 -# TODO: Update and tag -#- src: openmicroscopy.postgresql -# version: 2.0.0 -- name: openmicroscopy.postgresql - src: https://github.com/manics/ansible-role-postgresql/archive/pg-tune.tar.gz - version: pg-tune +- src: openmicroscopy.postgresql + version: 3.0.0-m1 - src: openmicroscopy.python-pydata version: 1.0.0 @@ -119,7 +115,7 @@ version: 1.0.0 - src: openmicroscopy.selinux-utils - version: 1.0.2 + version: 1.0.3 - src: openmicroscopy.storage-volume-initialise version: 1.0.0 @@ -169,15 +165,13 @@ src: https://github.com/openmicroscopy/ansible-role-docker-slack-notifier/archive/0.0.2.tar.gz version: 0.0.2 -# TODO: Update and tag - name: openmicroscopy.prometheus - src: https://github.com/manics/ansible-role-prometheus/archive/metrics_path.tar.gz - version: metrics_path + src: https://github.com/openmicroscopy/ansible-role-prometheus/archive/0.2.0.tar.gz + version: 0.2.0 -# TODO: Update and tag - name: openmicroscopy.prometheus-jmx - src: https://github.com/manics/ansible-role-prometheus-jmx/archive/jmx_prometheus_javaagent_0_10.tar.gz - version: jmx_prometheus_javaagent_0_10 + src: https://github.com/openmicroscopy/ansible-role-prometheus-jmx/archive/0.1.1.tar.gz + version: 0.1.1 - name: openmicroscopy.prometheus-node src: https://github.com/openmicroscopy/ansible-role-prometheus-node/archive/0.1.1.tar.gz From e6f2c286f224715fe100031094b7ebf812d7a49e Mon Sep 17 00:00:00 2001 From: Simon Li Date: Thu, 24 Aug 2017 12:35:53 +0100 Subject: [PATCH 30/30] Use IDR/omero-prometheus-tools instead of gist --- ansible/management-prometheus.yml | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/ansible/management-prometheus.yml b/ansible/management-prometheus.yml index ae860581..a3a12dc4 100644 --- a/ansible/management-prometheus.yml +++ b/ansible/management-prometheus.yml @@ -105,16 +105,15 @@ git: dest: /opt/prometheus-omero-py/src force: yes - repo: https://gist.github.com/238a63cfd9c2fb8a450252d79e609296.git - #version: HEAD - version: dbcb32b5bd212ff9dd1060808f2fa850194ab2d1 + repo: https://github.com/IDR/omero-prometheus-tools.git + version: 0.0.1 notify: - restart prometheus-omero-py - name: prometheus-omero-py | setup virtualenv become: yes pip: - requirements: /opt/prometheus-omero-py/src/requirements.txt + requirements: /opt/prometheus-omero-py/src/sessions/requirements.txt state: present virtualenv: /opt/prometheus-omero-py/venv virtualenv_site_packages: yes @@ -125,7 +124,7 @@ become: yes copy: dest: /etc/systemd/system/prometheus-omero-py.service - src: /opt/prometheus-omero-py/src/prometheus-omero-py-metrics.service + src: /opt/prometheus-omero-py/src/sessions/prometheus-omero-py-metrics.service remote_src: True notify: - restart prometheus-omero-py