kubernetes-sigs · k8s-ci-robot · Aug 7, 2019 · Jul 25, 2019 · riverzhang · Aug 6, 2019
diff --git a/docs/getting-started.md b/docs/getting-started.md
@@ -51,20 +51,27 @@ You may want to add worker, master or etcd nodes to your existing cluster. This
 Remove nodes
 ------------
 
-You may want to remove **worker** nodes to your existing cluster. This can be done by re-running the `remove-node.yml` playbook. First, all nodes will be drained, then stop some kubernetes services and delete some certificates, and finally execute the kubectl command to delete these nodes. This can be combined with the add node function, This is generally helpful when doing something like autoscaling your clusters. Of course if a node is not working, you can remove the node and install it again.
-
-Add worker nodes to the list under kube-node if you want to delete them (or utilize a [dynamic inventory](https://docs.ansible.com/ansible/intro_dynamic_inventory.html)).
-
-    ansible-playbook -i inventory/mycluster/hosts.yml remove-node.yml -b -v \
-        --private-key=~/.ssh/private_key
-
-Use `--extra-vars "node=<nodename>,<nodename2>"` to select the node you want to delete.
+You may want to remove **master**, **worker**, or **etcd** nodes from your
+existing cluster. This can be done by re-running the `remove-node.yml`
+playbook. First, all specified nodes will be drained, then stop some
+kubernetes services and delete some certificates,
+and finally execute the kubectl command to delete these nodes.
+This can be combined with the add node function. This is generally helpful
+when doing something like autoscaling your clusters. Of course, if a node
+is not working, you can remove the node and install it again.
+
+Use `--extra-vars "node=<nodename>,<nodename2>"` to select the node(s) you want to delete.
 ```
 ansible-playbook -i inventory/mycluster/hosts.yml remove-node.yml -b -v \
   --private-key=~/.ssh/private_key \
   --extra-vars "node=nodename,nodename2"
 ```
 
+If a node is completely unreachable by ssh, add `--extra-vars reset_nodes=no`
+to skip the node reset step. If one node is unavailable, but others you wish
+to remove are able to connect via SSH, you could set reset_nodes=no as a host
+var in inventory.
+
 Connecting to Kubernetes
 ------------------------
 

diff --git a/remove-node.yml b/remove-node.yml
@@ -1,6 +1,7 @@
 ---
 - hosts: localhost
   become: no
+  gather_facts: no
   tasks:
     - name: "Check ansible version >=2.7.8"
       assert:
@@ -12,12 +13,8 @@
   vars:
     ansible_connection: local
 
-- hosts: all
-  vars:
-    ansible_ssh_pipelining: true
-  gather_facts: true
-
 - hosts: "{{ node | default('etcd:k8s-cluster:calico-rr') }}"
+  gather_facts: no
   vars_prompt:
     name: "delete_nodes_confirmation"
     prompt: "Are you sure you want to delete nodes state? Type 'yes' to delete nodes."
@@ -31,16 +28,20 @@
       when: delete_nodes_confirmation != "yes"
 
 - hosts: kube-master
+  gather_facts: no
   roles:
     - { role: kubespray-defaults }
     - { role: remove-node/pre-remove, tags: pre-remove }
 
 - hosts: "{{ node | default('kube-node') }}"
+  gather_facts: no
   roles:
     - { role: kubespray-defaults }
-    - { role: reset, tags: reset }
+    - { role: reset, tags: reset, when: reset_nodes|default(True) }
 
-- hosts: kube-master
+# Currently cannot remove first master or etcd
+- hosts: "{{ node | default('kube-master[1:]:etcd[:1]') }}"
+  gather_facts: no
   roles:
     - { role: kubespray-defaults }
     - { role: remove-node/post-remove, tags: post-remove }
diff --git a/roles/remove-node/post-remove/tasks/main.yml b/roles/remove-node/post-remove/tasks/main.yml
@@ -1,9 +1,54 @@
 ---
+- name: Lookup node IP in kubernetes
+  shell: >-
+    {{ bin_dir }}/kubectl get nodes {{ node }}
+    -o jsonpath='{range.status.addresses[?(@.type=="InternalIP")]}{.address}{"\n"}{end}'
+  register: remove_node_ip
+  when:
+    - inventory_hostname in groups['etcd']
+    - ip is not defined
+    - access_ip is not defined
+  delegate_to: "{{ groups['etcd']|first }}"
+  failed_when: false
+
+- name: Set node IP
+  set_fact:
+    node_ip: "{{ ip | default(access_ip | default(remove_node_ip.stdout)) | trim }}"
 
 - name: Delete node
-  command: "{{ bin_dir }}/kubectl delete node {{ item }}"
-  with_items:
-    - "{{ node.split(',') | default(groups['kube-node']) }}"
+  command: "{{ bin_dir }}/kubectl delete node {{ inventory_hostname }}"
   delegate_to: "{{ groups['kube-master']|first }}"
   run_once: true
   ignore_errors: yes
+
+- name: Lookup etcd member id
+  shell: "{{ bin_dir }}/etcdctl --no-sync --endpoints={{ etcd_access_addresses }} member list | grep {{ node_ip }} | cut -d: -f1"
+  register: etcd_member_id
+  ignore_errors: true
+  changed_when: false
+  check_mode: no
+  tags:
+    - facts
+  environment:
+    ETCDCTL_CERT_FILE: "{{ etcd_cert_dir }}/admin-{{ groups['etcd']|first }}.pem"
+    ETCDCTL_KEY_FILE: "{{ etcd_cert_dir }}/admin-{{ groups['etcd']|first }}-key.pem"
+    ETCDCTL_CA_FILE: "{{ etcd_cert_dir }}/ca.pem"
+  delegate_to: "{{ groups['etcd']|first }}"
+  when: inventory_hostname in groups['etcd']
+
+- name: Remove etcd member from cluster
+  shell: "{{ bin_dir }}/etcdctl --no-sync --endpoints={{ etcd_access_addresses }} member remove {{ etcd_member_id.stdout }}"
+  register: etcd_member_in_cluster
+  ignore_errors: true
+  changed_when: false
+  check_mode: no
+  tags:
+    - facts
+  environment:
+    ETCDCTL_CERT_FILE: "{{ etcd_cert_dir }}/admin-{{ groups['etcd']|first }}.pem"
+    ETCDCTL_KEY_FILE: "{{ etcd_cert_dir }}/admin-{{ groups['etcd']|first }}-key.pem"
+    ETCDCTL_CA_FILE: "{{ etcd_cert_dir }}/ca.pem"
+  delegate_to: "{{ groups['etcd']|first }}"
+  when:
+    - inventory_hostname in groups['etcd']
+    - not etcd_member_id.stdout