From e281afc3dce2cd59a18e9594a0430773d7aaed06 Mon Sep 17 00:00:00 2001 From: khaledk2 Date: Thu, 1 May 2025 21:17:50 +0100 Subject: [PATCH 01/12] Deploy multi-data source searchengine --- ansible/decommission/archive-logs.yml | 4 +- ansible/group_vars/searchengine-hosts.yml | 5 ++- ansible/idr-searchengine.yml | 50 ++++++++++++++++++++--- 3 files changed, 49 insertions(+), 10 deletions(-) diff --git a/ansible/decommission/archive-logs.yml b/ansible/decommission/archive-logs.yml index cbc43dd3..45a8c19f 100644 --- a/ansible/decommission/archive-logs.yml +++ b/ansible/decommission/archive-logs.yml @@ -10,7 +10,6 @@ set_fact: decommission_archive_prefix: "{{ ansible_hostname }}-{{ ansible_date_time.date | replace('-', '') }}" - - hosts: "{{ idr_environment | default('idr') }}-proxy-hosts" vars: logs: @@ -42,7 +41,6 @@ from {{ ansible_hostname }}:{{ item.expanded_paths | join(',') }} with_items: "{{_decommission_archive_proxy.results}}" - - hosts: "{{ idr_environment | default('idr') }}-management-hosts" vars: logs: @@ -74,11 +72,11 @@ from {{ ansible_hostname }}:{{ item.expanded_paths | join(',') }} with_items: "{{_decommission_archive_management.results}}" - - hosts: "{{ idr_environment | default('idr') }}-searchengine-hosts" vars: logs: - { name: 'searchengine', path: '/data/searchengine/searchengine/logs' } + - {"name": "searchengine_backup_data/", path: "/searchengine_backup/backup_data"} tasks: - name: Archive searchengine logs diff --git a/ansible/group_vars/searchengine-hosts.yml b/ansible/group_vars/searchengine-hosts.yml index 089d8aeb..4f3636a9 100644 --- a/ansible/group_vars/searchengine-hosts.yml +++ b/ansible/group_vars/searchengine-hosts.yml @@ -2,13 +2,16 @@ apps_folder: /data #database_server_url: "{{ omero_db_host_ansible }}" database_port: 5432 database_name: idr +default_datasource: idr database_username: omeroreadonly database_user_password: "{{ idr_secret_postgresql_password_ro | default('omero') }}" searchenginecache_folder: /data/searchengine/searchengine/cacheddata/ search_engineelasticsearch_docker_image: docker.elastic.co/elasticsearch/elasticsearch:8.8.1 -searchengine_docker_image: openmicroscopy/omero-searchengine:0.6.0 +searchengine_docker_image: khaledk2/searchengine:multisource +# openmicroscopy/omero-searchengine:0.6.0 #ansible_python_interpreter: path/to/bin/python searchengine_index: searchengine_index +automatic_refresh: true cache_rows: 100000 # I think that the following two variables should be in secret searchengine_secret_key: "{{ idr_searchengine_secret_key | default('secret_key') }}" diff --git a/ansible/idr-searchengine.yml b/ansible/idr-searchengine.yml index 8c4f0b33..7e7a7453 100644 --- a/ansible/idr-searchengine.yml +++ b/ansible/idr-searchengine.yml @@ -68,7 +68,6 @@ volumes: - "{{ apps_folder }}/searchengine/searchengine/:/etc/searchengine/" - - name: configure database for docker searchengine become: yes docker_container: @@ -77,9 +76,8 @@ cleanup: True #auto_remove: yes command: > - set_database_configuration -u {{ database_server_url }} - -d {{ database_name }} -s {{ database_port }} -n {{ database_username }} -p {{ database_user_password }} - #networks: + set_database_configuration -d {{ database_name }} -s {{ database_port }} -n {{ database_username }} -p {{ database_user_password }} -b {{ apps_folder }} -w {{ default_datasource }} -u {{ database_server_url }} + #networks:: #- name: searchengine-net #published_ports: #- "5577:5577" @@ -87,6 +85,19 @@ volumes: - "{{ apps_folder }}/searchengine/searchengine/:/etc/searchengine/" + - name: set default data source + become: yes + docker_container: + image: "{{ searchengine_docker_image }}" + name: searchengine_database + cleanup: True + #auto_remove: yes + command: > + set_default_datasource -d {{ default_datasource }} + state: started + volumes: + - "{{ apps_folder }}/searchengine/searchengine/:/etc/searchengine/" + - name: configure IDR_TEST_FILE_URL item become: yes docker_container: @@ -98,7 +109,6 @@ volumes: - "{{ apps_folder }}/searchengine/searchengine/:/etc/searchengine/" - - name: configure cache folder for docker searchengine become: yes docker_container: @@ -151,6 +161,34 @@ volumes: - "{{ apps_folder }}/searchengine/searchengine/:/etc/searchengine/" +# automatic refreshing when configuration change is detected + + - name: Set automatic refreshing the configuration + become: yes + docker_container: + image: "{{ searchengine_docker_image }}" + name: elastics_password + cleanup: True + #auto_remove: yes + command: "set_automatic_refresh -a {{ automatic_refresh }}" + state: started + volumes: + - "{{ apps_folder }}/searchengine/searchengine/:/etc/searchengine/" + +# restore the elastic indexed data from the backup + + - name: Restore the backup elastic search indexed data + become: yes + docker_container: + image: "{{ searchengine_docker_image }}" + name: elastics_password + cleanup: True + #auto_remove: yes + command: "restore_elasticsearch_data" + state: started + volumes: + \q- "{{ apps_folder }}/searchengine/searchengine/:/etc/searchengine/" + - name: Run docker searchengine become: yes docker_container: @@ -167,4 +205,4 @@ restart_policy: always volumes: - "{{ apps_folder }}/searchengine/searchengine/:/etc/searchengine/" - - "{{ apps_folder }}/searchengine/searchengine/cachedata:/etc/searchengine/cachedata" + - "{{ apps_folder }}/searchengine/searchengine/cachedata:/etc/searchengine/cachedata" \ No newline at end of file From 9635253f8cbcf7862e00a9c5bb9be073665c5adc Mon Sep 17 00:00:00 2001 From: khaledk2 Date: Sun, 11 May 2025 20:32:47 +0100 Subject: [PATCH 02/12] Add comments to the playbooks --- ansible/decommission/archive-logs.yml | 3 ++- ansible/idr-searchengine.yml | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/ansible/decommission/archive-logs.yml b/ansible/decommission/archive-logs.yml index 45a8c19f..4d821e3b 100644 --- a/ansible/decommission/archive-logs.yml +++ b/ansible/decommission/archive-logs.yml @@ -76,7 +76,8 @@ vars: logs: - { name: 'searchengine', path: '/data/searchengine/searchengine/logs' } - - {"name": "searchengine_backup_data/", path: "/searchengine_backup/backup_data"} + # We may move archive the searchengine indexed data snapshot to a separate playbook + - {"name": "searchengine_backup_data/", path: "/searchengine_backup"} tasks: - name: Archive searchengine logs diff --git a/ansible/idr-searchengine.yml b/ansible/idr-searchengine.yml index 7e7a7453..1bff8193 100644 --- a/ansible/idr-searchengine.yml +++ b/ansible/idr-searchengine.yml @@ -176,7 +176,8 @@ - "{{ apps_folder }}/searchengine/searchengine/:/etc/searchengine/" # restore the elastic indexed data from the backup - + # This required that the indexed data snapshot be copied to the + # backup folder before running the elasticsearch deployment playbook - name: Restore the backup elastic search indexed data become: yes docker_container: @@ -187,7 +188,7 @@ command: "restore_elasticsearch_data" state: started volumes: - \q- "{{ apps_folder }}/searchengine/searchengine/:/etc/searchengine/" + - "{{ apps_folder }}/searchengine/searchengine/:/etc/searchengine/" - name: Run docker searchengine become: yes From bf3e6535c277faef643a65e7b95c5e54fa80793d Mon Sep 17 00:00:00 2001 From: khaledk2 Date: Mon, 12 May 2025 09:04:41 +0100 Subject: [PATCH 03/12] Add comment --- ansible/group_vars/searchengine-hosts.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/group_vars/searchengine-hosts.yml b/ansible/group_vars/searchengine-hosts.yml index 4f3636a9..8002df41 100644 --- a/ansible/group_vars/searchengine-hosts.yml +++ b/ansible/group_vars/searchengine-hosts.yml @@ -8,7 +8,7 @@ database_user_password: "{{ idr_secret_postgresql_password_ro | default('omero') searchenginecache_folder: /data/searchengine/searchengine/cacheddata/ search_engineelasticsearch_docker_image: docker.elastic.co/elasticsearch/elasticsearch:8.8.1 searchengine_docker_image: khaledk2/searchengine:multisource -# openmicroscopy/omero-searchengine:0.6.0 +# The search engine image should be replaced by openmicroscopy after release it #ansible_python_interpreter: path/to/bin/python searchengine_index: searchengine_index automatic_refresh: true From 44d6f3f8dd6b3f16fab488207c87d77175c46c03 Mon Sep 17 00:00:00 2001 From: khaledk2 Date: Fri, 16 May 2025 17:59:38 +0100 Subject: [PATCH 04/12] Update archive-logs.yml --- ansible/decommission/archive-logs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/decommission/archive-logs.yml b/ansible/decommission/archive-logs.yml index 4d821e3b..b977ef6a 100644 --- a/ansible/decommission/archive-logs.yml +++ b/ansible/decommission/archive-logs.yml @@ -77,7 +77,7 @@ logs: - { name: 'searchengine', path: '/data/searchengine/searchengine/logs' } # We may move archive the searchengine indexed data snapshot to a separate playbook - - {"name": "searchengine_backup_data/", path: "/searchengine_backup"} + - {"name": "searchengine_backup_data", path: "/searchengine_backup"} tasks: - name: Archive searchengine logs From 35a7d55da651f2a952f5cdb634f5344325e316a3 Mon Sep 17 00:00:00 2001 From: khaledk2 Date: Mon, 19 May 2025 15:57:00 +0100 Subject: [PATCH 05/12] Remove elasticsearch_backup_folder folder creation --- ansible/group_vars/searchengine-hosts.yml | 1 + ansible/idr-elasticsearch.yml | 12 ------------ 2 files changed, 1 insertion(+), 12 deletions(-) diff --git a/ansible/group_vars/searchengine-hosts.yml b/ansible/group_vars/searchengine-hosts.yml index 8002df41..6938bf67 100644 --- a/ansible/group_vars/searchengine-hosts.yml +++ b/ansible/group_vars/searchengine-hosts.yml @@ -18,6 +18,7 @@ searchengine_secret_key: "{{ idr_searchengine_secret_key | default('secret_key') searchengineurlprefix: "searchengine" IDR_TEST_FILE_URL: "https://raw.githubusercontent.com/IDR/idr.openmicroscopy.org/master/_data/studies.tsv" elasticsearch_no_nodes: 3 +# This should be a separate volume, cloned as part of the routine deployment process. elasticsearch_backup_folder: "/searchengine_backup" ca_password: "{{ idr_secret_elastic_ca_password | default('ca_password') }}" keystore_password: "{{ idr_secret_elastic_keystore_password | default('keystore_password') }}" diff --git a/ansible/idr-elasticsearch.yml b/ansible/idr-elasticsearch.yml index 4eea0ac5..63532dbc 100644 --- a/ansible/idr-elasticsearch.yml +++ b/ansible/idr-elasticsearch.yml @@ -73,18 +73,6 @@ mode: 0755 with_sequence: start=1 count={{ elasticsearch_no_nodes }} - - name: Create ELasticsearch backup folder - become: yes - file: - path: "{{ elasticsearch_backup_folder }}" - recurse: yes - state: directory - owner: 1000 - group: root - mode: 0755 - - - - name: Add elastic nodes to instances_nodes set_fact: instances_nodes: "{{instances_nodes + [( {'name' : 'searchengine_elasticsearch_node'+item, 'dns': ['searchengine_elasticsearch_node'+item,'localhost'],'ip': '127.0.0.1'})] }}" From 9bdb4f34299b0408b0fed155ecb032caa097a4b1 Mon Sep 17 00:00:00 2001 From: khaledk2 Date: Mon, 9 Jun 2025 10:44:20 +0100 Subject: [PATCH 06/12] change to use openmicroscopy image --- ansible/group_vars/searchengine-hosts.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ansible/group_vars/searchengine-hosts.yml b/ansible/group_vars/searchengine-hosts.yml index 6938bf67..ff448fff 100644 --- a/ansible/group_vars/searchengine-hosts.yml +++ b/ansible/group_vars/searchengine-hosts.yml @@ -7,9 +7,8 @@ database_username: omeroreadonly database_user_password: "{{ idr_secret_postgresql_password_ro | default('omero') }}" searchenginecache_folder: /data/searchengine/searchengine/cacheddata/ search_engineelasticsearch_docker_image: docker.elastic.co/elasticsearch/elasticsearch:8.8.1 -searchengine_docker_image: khaledk2/searchengine:multisource -# The search engine image should be replaced by openmicroscopy after release it -#ansible_python_interpreter: path/to/bin/python +searchengine_docker_image: openmicroscopy/omero-searchengine:0.7 +#ansible_python_interpreter: path/to/bin/python` searchengine_index: searchengine_index automatic_refresh: true cache_rows: 100000 From 664cca816c55ecae8251aae812d795d26e3bd255 Mon Sep 17 00:00:00 2001 From: khaledk2 Date: Mon, 9 Jun 2025 11:45:20 +0100 Subject: [PATCH 07/12] create volume for searchengine backup --- ansible/openstack-create-publicidr.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ansible/openstack-create-publicidr.yml b/ansible/openstack-create-publicidr.yml index b312b659..cf223a1b 100644 --- a/ansible/openstack-create-publicidr.yml +++ b/ansible/openstack-create-publicidr.yml @@ -169,3 +169,11 @@ openstack_volume_name: "{{ idr_environment_idr }}-searchengine-data" openstack_volume_device: /dev/vdb openstack_volume_type: "{{ idr_volume_searchengine_data_type | default(omit) }}" + + #searchengine searchengine_backup volume + - role: ome.openstack_volume_storage + openstack_volume_size: 25 + openstack_volume_vmname: "{{ idr_environment_idr }}-searchengine" + openstack_volume_name: "{{ idr_environment_idr }}-searchengine-searchengine_backup" + openstack_volume_device: /dev/vdb + openstack_volume_type: "{{ idr_volume_searchengine_data_type | default(omit) }}" From e6103f90dbae8916245c43dc70785db704d958fc Mon Sep 17 00:00:00 2001 From: khaledk2 Date: Mon, 9 Jun 2025 11:49:56 +0100 Subject: [PATCH 08/12] snapshot for searchengine_backup --- scripts/os-idr-snapshot.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/os-idr-snapshot.sh b/scripts/os-idr-snapshot.sh index 9b9d321b..e7a72c21 100755 --- a/scripts/os-idr-snapshot.sh +++ b/scripts/os-idr-snapshot.sh @@ -18,6 +18,7 @@ for vol in \ database-db \ omeroreadwrite-data \ proxy-nginxcache \ + searchengine-searchengine_backup \ ; do volume="$vm_prefix-$vol" echo "Snapshotting volume $volume" From 09f1da52df5867c7c1bba1aeb2011011216d45b5 Mon Sep 17 00:00:00 2001 From: khaledk2 Date: Tue, 10 Jun 2025 10:45:12 +0100 Subject: [PATCH 09/12] add restore and backup searchengine data playbooks and address review issues --- ansible/backup_searchengine_data.yml | 18 ++++++++++++++++++ ansible/group_vars/searchengine-hosts.yml | 2 +- ansible/idr-elasticsearch.yml | 4 ++-- ansible/idr-searchengine.yml | 17 +---------------- ansible/restore_searchengine_data.yml | 18 ++++++++++++++++++ scripts/os-idr-snapshot.sh | 2 +- 6 files changed, 41 insertions(+), 20 deletions(-) create mode 100644 ansible/backup_searchengine_data.yml create mode 100644 ansible/restore_searchengine_data.yml diff --git a/ansible/backup_searchengine_data.yml b/ansible/backup_searchengine_data.yml new file mode 100644 index 00000000..45b3273b --- /dev/null +++ b/ansible/backup_searchengine_data.yml @@ -0,0 +1,18 @@ +- name: Backup searchengine data + hosts: "{{ idr_environment | default('idr') }}-searchengine-hosts" + + tasks: + # Backup the elastic indexed data + - name: backup elastic search indexed data + become: yes + docker_container: + image: "{{ searchengine_docker_image }}" + name: restore_elasticsearch_data + cleanup: True + #auto_remove: yes + networks: + - name: searchengine-net + command: "backup_elasticsearch_data" + state: started + volumes: + - "{{ apps_folder }}/searchengine/searchengine/:/etc/searchengine/" diff --git a/ansible/group_vars/searchengine-hosts.yml b/ansible/group_vars/searchengine-hosts.yml index ff448fff..01404220 100644 --- a/ansible/group_vars/searchengine-hosts.yml +++ b/ansible/group_vars/searchengine-hosts.yml @@ -8,7 +8,7 @@ database_user_password: "{{ idr_secret_postgresql_password_ro | default('omero') searchenginecache_folder: /data/searchengine/searchengine/cacheddata/ search_engineelasticsearch_docker_image: docker.elastic.co/elasticsearch/elasticsearch:8.8.1 searchengine_docker_image: openmicroscopy/omero-searchengine:0.7 -#ansible_python_interpreter: path/to/bin/python` +# ansible_python_interpreter: path/to/bin/python searchengine_index: searchengine_index automatic_refresh: true cache_rows: 100000 diff --git a/ansible/idr-elasticsearch.yml b/ansible/idr-elasticsearch.yml index 63532dbc..d312b4e2 100644 --- a/ansible/idr-elasticsearch.yml +++ b/ansible/idr-elasticsearch.yml @@ -118,7 +118,7 @@ #fi; state: started volumes: - - "{{ apps_folder }}/searchengine/elasticsearch/certs: /certs" + - "{{ apps_folder }}/searchengine/elasticsearch/certs:/certs" - "{{ apps_folder }}/searchengine/elasticsearch/certs:/usr/share/elasticsearch/config/certificates" - name: Wait for CA file @@ -151,7 +151,7 @@ state: started volumes: - - "{{ apps_folder }}/searchengine/elasticsearch/certs: /certs" + - "{{ apps_folder }}/searchengine/elasticsearch/certs:/certs" - "{{ apps_folder }}/searchengine/elasticsearch/certs:/usr/share/elasticsearch/config/certificates" - /tmp/instances.yaml:/tmp/instances.yaml diff --git a/ansible/idr-searchengine.yml b/ansible/idr-searchengine.yml index 1bff8193..8009863d 100644 --- a/ansible/idr-searchengine.yml +++ b/ansible/idr-searchengine.yml @@ -175,21 +175,6 @@ volumes: - "{{ apps_folder }}/searchengine/searchengine/:/etc/searchengine/" -# restore the elastic indexed data from the backup - # This required that the indexed data snapshot be copied to the - # backup folder before running the elasticsearch deployment playbook - - name: Restore the backup elastic search indexed data - become: yes - docker_container: - image: "{{ searchengine_docker_image }}" - name: elastics_password - cleanup: True - #auto_remove: yes - command: "restore_elasticsearch_data" - state: started - volumes: - - "{{ apps_folder }}/searchengine/searchengine/:/etc/searchengine/" - - name: Run docker searchengine become: yes docker_container: @@ -206,4 +191,4 @@ restart_policy: always volumes: - "{{ apps_folder }}/searchengine/searchengine/:/etc/searchengine/" - - "{{ apps_folder }}/searchengine/searchengine/cachedata:/etc/searchengine/cachedata" \ No newline at end of file + - "{{ apps_folder }}/searchengine/searchengine/cachedata:/etc/searchengine/cachedata" diff --git a/ansible/restore_searchengine_data.yml b/ansible/restore_searchengine_data.yml new file mode 100644 index 00000000..6fcee6b8 --- /dev/null +++ b/ansible/restore_searchengine_data.yml @@ -0,0 +1,18 @@ +- name: Restore searchengine data + hosts: "{{ idr_environment | default('idr') }}-searchengine-hosts" + + tasks: + # restore the elastic indexed data from the backup + - name: Restore the backup elastic search indexed data + become: yes + docker_container: + image: "{{ searchengine_docker_image }}" + name: restore_elasticsearch_data + cleanup: True + #auto_remove: yes + networks: + - name: searchengine-net + command: "restore_elasticsearch_data" + state: started + volumes: + - "{{ apps_folder }}/searchengine/searchengine/:/etc/searchengine/" diff --git a/scripts/os-idr-snapshot.sh b/scripts/os-idr-snapshot.sh index e7a72c21..c499a958 100755 --- a/scripts/os-idr-snapshot.sh +++ b/scripts/os-idr-snapshot.sh @@ -18,7 +18,7 @@ for vol in \ database-db \ omeroreadwrite-data \ proxy-nginxcache \ - searchengine-searchengine_backup \ + searchengine_backup \ ; do volume="$vm_prefix-$vol" echo "Snapshotting volume $volume" From 871706874f49b76fa65d716631f267d462704515 Mon Sep 17 00:00:00 2001 From: khaledk2 Date: Tue, 10 Jun 2025 11:41:28 +0100 Subject: [PATCH 10/12] remove comment --- ansible/decommission/archive-logs.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/ansible/decommission/archive-logs.yml b/ansible/decommission/archive-logs.yml index b977ef6a..4e5c8e8f 100644 --- a/ansible/decommission/archive-logs.yml +++ b/ansible/decommission/archive-logs.yml @@ -76,7 +76,6 @@ vars: logs: - { name: 'searchengine', path: '/data/searchengine/searchengine/logs' } - # We may move archive the searchengine indexed data snapshot to a separate playbook - {"name": "searchengine_backup_data", path: "/searchengine_backup"} tasks: From 43b4a670c13bccf2011d3b4fd5278dcb35da4b3e Mon Sep 17 00:00:00 2001 From: khaledk2 Date: Tue, 10 Jun 2025 11:57:30 +0100 Subject: [PATCH 11/12] Verify whether the snapshot exists before restore --- ansible/restore_searchengine_data.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ansible/restore_searchengine_data.yml b/ansible/restore_searchengine_data.yml index 6fcee6b8..9b86186e 100644 --- a/ansible/restore_searchengine_data.yml +++ b/ansible/restore_searchengine_data.yml @@ -2,6 +2,11 @@ hosts: "{{ idr_environment | default('idr') }}-searchengine-hosts" tasks: + - name: Verify whether the snapshot exists + stat: + path: "{{ elasticsearch_backup_folder }}/backup_data" + register: snapshot + # restore the elastic indexed data from the backup - name: Restore the backup elastic search indexed data become: yes @@ -16,3 +21,4 @@ state: started volumes: - "{{ apps_folder }}/searchengine/searchengine/:/etc/searchengine/" + when: snapshot.stat.exists == True From 143451a555e081953d25b30cf51fe026e90354d8 Mon Sep 17 00:00:00 2001 From: khaledk2 Date: Tue, 10 Jun 2025 13:23:50 +0100 Subject: [PATCH 12/12] Run restore_searchengine_data automaticalley --- ansible/idr-02-services.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ansible/idr-02-services.yml b/ansible/idr-02-services.yml index 8e4ab1a5..a0c11b59 100644 --- a/ansible/idr-02-services.yml +++ b/ansible/idr-02-services.yml @@ -9,3 +9,6 @@ ## Search services - import_playbook: idr-elasticsearch.yml - import_playbook: idr-searchengine.yml + +## Restore searchengine data if existence +- import_playbook: restore_searchengine_data.yml