|
1 | | -#!/bin/bash -eu |
| 1 | +#!/usr/bin/env bash |
2 | 2 |
|
3 | | -NODE_IP=<%= spec.ip %> |
4 | | -MYSQL_PORT=<%= p("cf_mysql.mysql.port") %> |
| 3 | +set -e -o pipefail |
5 | 4 |
|
6 | | -LOG_DIR="/var/vcap/sys/log/mysql/" |
| 5 | +<% |
| 6 | + require "shellwords" |
7 | 7 |
|
8 | | -# if the node ain't running, ain't got nothin' to drain |
9 | | -if ! ps -p $(</var/vcap/sys/run/mysql/mysql.pid) >/dev/null; then |
10 | | - echo "mysql is not running: drain OK" &>> "$LOG_DIR/drain.log" |
11 | | - echo 0; exit 0 # drain success |
12 | | -fi |
| 8 | + cluster_ips = link('mysql').instances.map(&:address) |
| 9 | + if_link('arbitrator') do |
| 10 | + cluster_ips += link('arbitrator').instances.map(&:address) |
| 11 | + end |
| 12 | +%> |
| 13 | + |
| 14 | +CLUSTER_NODES=(<%= cluster_ips.map{|e| Shellwords.escape e}.join(' ') %>) |
| 15 | +MYSQL_PORT=<%= Shellwords.escape p("cf_mysql.mysql.port") %> |
| 16 | + |
| 17 | +function prepend_datetime() { |
| 18 | + awk -W interactive '{ system("echo -n [$(date +%FT%T%z)]"); print " " $0 }' |
| 19 | +} |
13 | 20 |
|
14 | 21 | function wsrep_var() { |
15 | | - local var_name=$1 |
16 | | - local host=$2 |
17 | | - local port=$3 |
| 22 | + local var_name="$1" |
| 23 | + local host="$2" |
18 | 24 | if [[ $var_name =~ ^wsrep_[a-z_]+$ ]]; then |
19 | 25 | timeout 5 \ |
20 | | - /usr/local/bin/mysql --defaults-file=/var/vcap/jobs/mysql/config/drain.cnf -h "$host" -P "$port" \ |
21 | | - --execute="SHOW STATUS LIKE '$var_name'" -N |\ |
22 | | - awk '{print $2}' | tr -d '\n' |
| 26 | + /usr/local/bin/mysql --defaults-file=/var/vcap/jobs/mysql/config/drain.cnf -h "$host" -P "$MYSQL_PORT" \ |
| 27 | + --execute="SHOW STATUS LIKE '$var_name'" -N \ |
| 28 | + | awk '{print $2}' \ |
| 29 | + | tr -d '\n' |
23 | 30 | fi |
24 | 31 | } |
25 | 32 |
|
26 | | -CLUSTER_NODES=(`wsrep_var wsrep_incoming_addresses $NODE_IP $MYSQL_PORT | sed -e 's/,/ /g'`) |
| 33 | +LOG_DIR="/var/vcap/sys/log/mysql" |
| 34 | + |
| 35 | +exec 3>&1 |
| 36 | +exec \ |
| 37 | + 1> >(prepend_datetime >> $LOG_DIR/drain.out.log) \ |
| 38 | + 2> >(prepend_datetime >> $LOG_DIR/drain.err.log) |
| 39 | + |
| 40 | +# if the node ain't running, ain't got nothin' to drain |
| 41 | +if ! ps -p $(</var/vcap/sys/run/mysql/mysql.pid) >/dev/null; then |
| 42 | + echo "mysql is not running: drain OK" |
| 43 | + echo 0 >&3; exit 0 # drain success |
| 44 | +fi |
| 45 | + |
| 46 | +# Check each cluster node's availability. |
| 47 | +# Jump to next node if unreachable(timeout 5 sec), then do not add it as test component. |
| 48 | +# Node may have been deleted or mysql port has been updated. |
| 49 | +for NODE in "${CLUSTER_NODES[@]}"; do |
| 50 | + { nc -zv -w 5 $NODE $MYSQL_PORT \ |
| 51 | + && CLUSTER_TEST_NODES=(${CLUSTER_TEST_NODES[@]} $NODE); } \ |
| 52 | + || continue |
| 53 | +done |
27 | 54 |
|
28 | | -# check if all nodes are part of the PRIMARY component; if not then |
| 55 | +# Check if all nodes are part of the PRIMARY component; if not then |
29 | 56 | # something is terribly wrong (loss of quorum or split-brain) and doing a |
30 | 57 | # rolling restart can actually cause data loss (e.g. if a node that is out |
31 | 58 | # of sync is used to bootstrap the cluster): in this case we fail immediately. |
32 | | -for NODE in "${CLUSTER_NODES[@]}"; do |
33 | | - NODE_IP=`echo $NODE | cut -d ":" -f 1` |
34 | | - NODE_PORT=`echo $NODE | cut -d ":" -f 2` |
35 | | - cluster_status=`wsrep_var wsrep_cluster_status $NODE_IP $NODE_PORT` |
36 | | - if [ "$cluster_status" != "Primary" ]; then |
37 | | - echo "wsrep_cluster_status of node '$NODE_IP' is '$cluster_status' (expected 'Primary'): drain failed" &>> "$LOG_DIR/drain.log" |
38 | | - exit 1 # drain failed |
| 59 | +for TEST_NODE in "${CLUSTER_TEST_NODES[@]}"; do |
| 60 | + cluster_status=$(wsrep_var wsrep_cluster_status "$TEST_NODE") |
| 61 | + if [ "$cluster_status" != Primary ]; then |
| 62 | + echo "wsrep_cluster_status of node '$TEST_NODE' is '$cluster_status' (expected 'Primary'): drain failed" |
| 63 | + exit -1 # drain failed |
39 | 64 | fi |
40 | 65 | done |
41 | 66 |
|
|
44 | 69 | # Consider a 3 node cluster: if node1 is donor for node2 and we shut down node3 |
45 | 70 | # -that is synced- then node1 is joining, node2 is donor and node3 is down: as |
46 | 71 | # a result the cluster lose quorum until node1/node2 complete the transfer!) |
47 | | -for NODE in "${CLUSTER_NODES[@]}"; do |
48 | | - NODE_IP=`echo $NODE | cut -d ":" -f 1` |
49 | | - NODE_PORT=`echo $NODE | cut -d ":" -f 2` |
50 | | - state=`wsrep_var wsrep_local_state_comment $NODE_IP $NODE_PORT` |
51 | | - if [ "$state" != "Synced" ]; then |
52 | | - echo "wsrep_local_state_comment of node '$NODE_IP' is '$state' (expected 'Synced'): retry drain in 5 seconds" &>> "$LOG_DIR/drain.log" |
| 72 | +for TEST_NODE in "${CLUSTER_TEST_NODES[@]}"; do |
| 73 | + state=$(wsrep_var wsrep_local_state_comment "$TEST_NODE") |
| 74 | + if [ "$state" != Synced ]; then |
| 75 | + echo "wsrep_local_state_comment of node '$TEST_NODE' is '$state' (expected 'Synced'): retry drain in 5 seconds" |
53 | 76 | # TODO: rewrite to avoid using dynamic drain (soon to be deprecated) |
54 | | - echo -5 # retry in 5 seconds |
| 77 | + echo -5 >&3; exit 0 # retry in 5 seconds |
55 | 78 | fi |
56 | 79 | done |
57 | 80 |
|
58 | | -echo "Drain Success" &>> "$LOG_DIR/drain.log" |
59 | | -echo 0; exit 0 # drain success |
| 81 | +echo "Drain Success" |
| 82 | +echo 0 >&3; exit 0 # drain success |
0 commit comments