-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathelasticsearch-rolling-upgrade.yml
218 lines (196 loc) · 8.1 KB
/
elasticsearch-rolling-upgrade.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
# Ansible
#
# Rolling Upgrade of Elasticsearch with security on
# Source from: author: Jeff Steinmetz, @jeffsteinmetz; Bin Li, @holysoros
# Modifications: author: Daniel Neuberger @netways.de
# More modifications: NETWAYS Professional Services GmbH
# latest tested with Ansible 2.9 and later
---
# For now we support upgrade only for clusters with security enabled
# If you positively need support for safely upgrading clusters without security,
# feel free to open an issue at https://github.com/NETWAYS/ansible-collection-elasticstack/issues
- name: Set connection protocol to https
ansible.builtin.set_fact:
elasticsearch_http_protocol: "https"
- name: Check for running Elasticsearch service
ansible.builtin.systemd:
name: elasticsearch
register: elasticsearch_running
- name: Update stopped services right away
when:
- elasticsearch_running.status.ActiveState == "inactive"
block:
- name: Update stopped Elasticsearch - rpm with managed repositories
ansible.builtin.package:
name: "{{ elasticsearch_package }}"
enablerepo:
- 'elastic-{% if elasticstack_variant == "oss" %}oss-{% endif %}{{ elasticstack_release }}.x'
when:
- ansible_os_family == "RedHat"
- elasticstack_full_stack | bool
- name: Update stopped Elasticsearch - deb or unmanaged repositories rpm
ansible.builtin.package:
name: "{{ elasticsearch_package }}"
when:
- ansible_os_family == "Debian" or
not elasticstack_full_stack | bool
- name: Update single instances without extra caution
when:
- groups[elasticstack_elasticsearch_group_name] | length == 1
block:
- name: Update single instances without extra caution - deb or unmanaged repositories rpm
ansible.builtin.package:
name: "{{ elasticsearch_package }}"
when:
- ansible_os_family == "Debian" or
not elasticstack_full_stack | bool
notify:
- Restart Elasticsearch
- name: Update single instances without extra caution - rpm with managed repositories
ansible.builtin.package:
name: "{{ elasticsearch_package }}"
enablerepo:
- 'elastic-{% if elasticstack_variant == "oss" %}oss-{% endif %}{{ elasticstack_release }}.x'
when:
- ansible_os_family == "RedHat"
- elasticstack_full_stack | bool
notify:
- Restart Elasticsearch
- name: Be careful about upgrade when Elasticsearch is running
when:
- elasticsearch_running.status.ActiveState == "active"
- groups[elasticstack_elasticsearch_group_name] | length > 1
block:
# Usually we should not need this step. It's only there to recover from broken upgrade plays
# Without this step the cluster would never recover and the play would always fail
- name: Enable shard allocation for the cluster
ansible.builtin.uri:
url: "{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_cluster/settings"
method: PUT
body: '{ "persistent": { "cluster.routing.allocation.enable": null }}'
body_format: json
user: elastic
password: "{{ elasticstack_password.stdout }}"
validate_certs: no
register: response
# next line is boolean not string, so no quotes around true
# use python truthiness
until: "response.json.acknowledged == true"
retries: 5
delay: 30
# this step is key!!! Don't restart more nodes
# until all shards have completed recovery
- name: Wait for cluster health to return to green
ansible.builtin.uri:
url: "{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_cluster/health"
method: GET
user: elastic
password: "{{ elasticstack_password.stdout }}"
validate_certs: no
register: response
until: "response.json.status == 'green'"
retries: 50
delay: 30
# Disabling shard allocation right after enabling it seems redundant. Please see above for details.
- name: Disable shard allocation for the cluster
ansible.builtin.uri:
url: "{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_cluster/settings"
method: PUT
body: '{ "persistent": { "cluster.routing.allocation.enable": "none" }}'
body_format: json
user: elastic
password: "{{ elasticstack_password.stdout }}"
validate_certs: no
- name: Stop non essential indexing to speed up shard recovery
ansible.builtin.uri:
url: "{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_flush"
method: POST
user: elastic
password: "{{ elasticstack_password.stdout }}"
validate_certs: no
failed_when: false
- name: Shutdown elasticsearch service
ansible.builtin.service:
name: elasticsearch
enabled: yes
state: stopped
when:
- not elasticsearch_unsafe_upgrade_restart | bool
- name: Update Elasticsearch - rpm with managed repositories
ansible.builtin.package:
name: "{{ elasticsearch_package }}"
enablerepo:
- 'elastic-{% if elasticstack_variant == "oss" %}oss-{% endif %}{{ elasticstack_release }}.x'
when:
- ansible_os_family == "RedHat"
- elasticstack_full_stack | bool
- name: Update Elasticsearch - deb or unmanaged repositories rpm
ansible.builtin.package:
name: "{{ elasticsearch_package }}"
when:
- ansible_os_family == "Debian" or
not elasticstack_full_stack | bool
- name: Start elasticsearch
ansible.builtin.service:
name: elasticsearch
enabled: yes
state: started
when:
- elasticsearch_running.status.ActiveState == "active"
- not elasticsearch_unsafe_upgrade_restart | bool
- name: Restart elasticsearch (fast, for non-prod)
ansible.builtin.service:
name: elasticsearch
enabled: yes
state: restarted
when:
- elasticsearch_running.status.ActiveState == "active"
- elasticsearch_unsafe_upgrade_restart | bool
- name: Wait for elasticsearch node to come back up if it was stopped
ansible.builtin.wait_for:
host: "{{ elasticsearch_api_host }}"
port: "{{ elasticstack_elasticsearch_http_port }}"
delay: 30
- name: Confirm the node joins the cluster # noqa: risky-shell-pipe
ansible.builtin.shell: >
if test -n "$(ps -p $$ | grep bash)"; then set -o pipefail; fi;
curl
-k
-u elastic:{{ elasticstack_password.stdout }}
-s
-m 2
'{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_cat/nodes?h=name'
| grep
-E
'^{{ elasticsearch_nodename }}$'
register: result
until: result.rc == 0
retries: 200
delay: 3
changed_when: false
- name: Enable shard allocation for the cluster
ansible.builtin.uri:
url: "{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_cluster/settings"
method: PUT
body: '{ "persistent": { "cluster.routing.allocation.enable": null }}'
body_format: json
user: elastic
password: "{{ elasticstack_password.stdout }}"
validate_certs: no
register: response
# next line is boolean not string, so no quotes around true
# use python truthiness
until: "response.json.acknowledged == true"
retries: 5
delay: 30
- name: Wait for cluster health to return to yellow or green
ansible.builtin.uri:
url: "{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_cluster/health"
method: GET
user: elastic
password: "{{ elasticstack_password.stdout }}"
validate_certs: no
register: response
until: "response.json.status == 'yellow' or response.json.status == 'green'"
retries: 5
delay: 30