From cf662ed74b895373f85de2ff03820f364045122b Mon Sep 17 00:00:00 2001 From: Jordan Borean Date: Tue, 21 Nov 2017 10:01:22 +1000 Subject: [PATCH] win_reboot: change to sample system uptime instead of checking port status (#31767) * win_reboot: change to sample system uptime instead of checking port status * added connection timeout back in as now we can manually set it per connection. * some pep8 fixes * fix up error message on timeout in case an exception wasn't fired * Changed doc to English (US) and simplified uptime check * moved conn timeout over to new config connection options --- lib/ansible/modules/windows/win_reboot.py | 2 +- lib/ansible/plugins/action/win_reboot.py | 109 ++++++++++++++-------- lib/ansible/plugins/connection/winrm.py | 18 +++- 3 files changed, 89 insertions(+), 40 deletions(-) diff --git a/lib/ansible/modules/windows/win_reboot.py b/lib/ansible/modules/windows/win_reboot.py index c15a386df7..64e6018dfb 100644 --- a/lib/ansible/modules/windows/win_reboot.py +++ b/lib/ansible/modules/windows/win_reboot.py @@ -31,6 +31,7 @@ options: description: - Maximum seconds to wait for shutdown to occur - Increase this timeout for very slow hardware, large update applications, etc + - This option has been removed since Ansible 2.5 as the win_reboot behavior has changed default: 600 aliases: [ shutdown_timeout_sec ] reboot_timeout: @@ -70,7 +71,6 @@ EXAMPLES = r''' # Reboot a slow machine that might have lots of updates to apply - win_reboot: - shutdown_timeout: 3600 reboot_timeout: 3600 ''' diff --git a/lib/ansible/plugins/action/win_reboot.py b/lib/ansible/plugins/action/win_reboot.py index 70cdc2fc0b..e2f231eee5 100644 --- a/lib/ansible/plugins/action/win_reboot.py +++ b/lib/ansible/plugins/action/win_reboot.py @@ -4,12 +4,13 @@ from __future__ import (absolute_import, division, print_function) __metaclass__ = type -import socket import time from datetime import datetime, timedelta +from ansible.errors import AnsibleError from ansible.plugins.action import ActionBase +from ansible.module_utils._text import to_native try: from __main__ import display @@ -25,7 +26,6 @@ class TimedOutException(Exception): class ActionModule(ActionBase): TRANSFERS_FILES = False - DEFAULT_SHUTDOWN_TIMEOUT = 600 DEFAULT_REBOOT_TIMEOUT = 600 DEFAULT_CONNECT_TIMEOUT = 5 DEFAULT_PRE_REBOOT_DELAY = 2 @@ -33,10 +33,20 @@ class ActionModule(ActionBase): DEFAULT_TEST_COMMAND = 'whoami' DEFAULT_REBOOT_MESSAGE = 'Reboot initiated by Ansible.' + def get_system_uptime(self): + uptime_command = "(Get-WmiObject -ClassName Win32_OperatingSystem).LastBootUpTime" + (rc, stdout, stderr) = self._connection.exec_command(uptime_command) + + if rc != 0: + raise Exception("win_reboot: failed to get host uptime info, rc: %d, stdout: %s, stderr: %s" + % (rc, stdout, stderr)) + + return stdout + def do_until_success_or_timeout(self, what, timeout, what_desc, fail_sleep=1): max_end_time = datetime.utcnow() + timedelta(seconds=timeout) - e = None + exc = "" while datetime.utcnow() < max_end_time: try: what() @@ -44,11 +54,12 @@ class ActionModule(ActionBase): display.debug("win_reboot: %s success" % what_desc) return except Exception as e: + exc = e if what_desc: display.debug("win_reboot: %s fail (expected), retrying in %d seconds..." % (what_desc, fail_sleep)) time.sleep(fail_sleep) - raise TimedOutException("timed out waiting for %s: %s" % (what_desc, e)) + raise TimedOutException("timed out waiting for %s: %s" % (what_desc, exc)) def run(self, tmp=None, task_vars=None): @@ -66,25 +77,25 @@ class ActionModule(ActionBase): if result.get('skipped', False) or result.get('failed', False): return result - winrm_host = self._connection._winrm_host - winrm_port = self._connection._winrm_port - # Handle timeout parameters and its alias - if self._task.args.get('shutdown_timeout') is not None: - shutdown_timeout = int(self._task.args.get('shutdown_timeout', self.DEFAULT_SHUTDOWN_TIMEOUT)) - else: - shutdown_timeout = int(self._task.args.get('shutdown_timeout_sec', self.DEFAULT_SHUTDOWN_TIMEOUT)) - - if self._task.args.get('reboot_timeout') is not None: - reboot_timeout = int(self._task.args.get('reboot_timeout', self.DEFAULT_REBOOT_TIMEOUT)) - else: - reboot_timeout = int(self._task.args.get('reboot_timeout_sec', self.DEFAULT_REBOOT_TIMEOUT)) + deprecated_args = { + 'shutdown_timeout': '2.5', + 'shutdown_timeout_sec': '2.5', + } + for arg, version in deprecated_args.items(): + if self._task.args.get(arg) is not None: + display.warning("Since Ansible %s, %s is no longer used with win_reboot" % (arg, version)) if self._task.args.get('connect_timeout') is not None: connect_timeout = int(self._task.args.get('connect_timeout', self.DEFAULT_CONNECT_TIMEOUT)) else: connect_timeout = int(self._task.args.get('connect_timeout_sec', self.DEFAULT_CONNECT_TIMEOUT)) + if self._task.args.get('reboot_timeout') is not None: + reboot_timeout = int(self._task.args.get('reboot_timeout', self.DEFAULT_REBOOT_TIMEOUT)) + else: + reboot_timeout = int(self._task.args.get('reboot_timeout_sec', self.DEFAULT_REBOOT_TIMEOUT)) + if self._task.args.get('pre_reboot_delay') is not None: pre_reboot_delay = int(self._task.args.get('pre_reboot_delay', self.DEFAULT_PRE_REBOOT_DELAY)) else: @@ -98,7 +109,17 @@ class ActionModule(ActionBase): test_command = str(self._task.args.get('test_command', self.DEFAULT_TEST_COMMAND)) msg = str(self._task.args.get('msg', self.DEFAULT_REBOOT_MESSAGE)) + # Get current uptime + try: + before_uptime = self.get_system_uptime() + except Exception as e: + result['failed'] = True + result['reboot'] = False + result['msg'] = to_native(e) + return result + # Initiate reboot + display.vvv("rebooting server") (rc, stdout, stderr) = self._connection.exec_command('shutdown /r /t %d /c "%s"' % (pre_reboot_delay, msg)) # Test for "A system shutdown has already been scheduled. (1190)" and handle it gracefully @@ -119,40 +140,52 @@ class ActionModule(ActionBase): result['msg'] = "Shutdown command failed, error text was %s" % stderr return result - def raise_if_port_open(): - try: - sock = socket.create_connection((winrm_host, winrm_port), connect_timeout) - sock.close() - except: - return False - - raise Exception("port is open") - start = datetime.now() + # Get the original connection_timeout option var so it can be reset after + connection_timeout_orig = None + try: + connection_timeout_orig = self._connection.get_option('connection_timeout') + except AnsibleError: + display.debug("win_reboot: connection_timeout connection option has not been set") try: - self.do_until_success_or_timeout(raise_if_port_open, shutdown_timeout, what_desc="winrm port down") + # keep on checking system uptime with short connection responses + def check_uptime(): + display.vvv("attempting to get system uptime") - def connect_winrm_port(): - sock = socket.create_connection((winrm_host, winrm_port), connect_timeout) - sock.close() - - self.do_until_success_or_timeout(connect_winrm_port, reboot_timeout, what_desc="winrm port up") - - def run_test_command(): - display.vvv("attempting post-reboot test command '%s'" % test_command) - # call connection reset between runs if it's there + # override connection timeout from defaults to custom value try: + self._connection.set_options(direct={"connection_timeout": connect_timeout}) self._connection._reset() except AttributeError: - pass + display.warning("Connection plugin does not allow the connection timeout to be overridden") + # try and get uptime + try: + current_uptime = self.get_system_uptime() + except Exception as e: + raise e + + if current_uptime == before_uptime: + raise Exception("uptime has not changed") + + self.do_until_success_or_timeout(check_uptime, reboot_timeout, what_desc="reboot uptime check success") + + # reset the connection to clear the custom connection timeout + try: + self._connection.set_options(direct={"connection_timeout": connection_timeout_orig}) + self._connection._reset() + except (AnsibleError, AttributeError): + display.debug("Failed to reset connection_timeout back to default") + + # finally run test command to ensure everything is working + def run_test_command(): + display.vvv("attempting post-reboot test command '%s'" % test_command) (rc, stdout, stderr) = self._connection.exec_command(test_command) if rc != 0: raise Exception('test command failed') - # FUTURE: ensure that a reboot has actually occurred by watching for change in last boot time fact # FUTURE: add a stability check (system must remain up for N seconds) to deal with self-multi-reboot updates self.do_until_success_or_timeout(run_test_command, reboot_timeout, what_desc="post-reboot test command success") @@ -163,7 +196,7 @@ class ActionModule(ActionBase): except TimedOutException as toex: result['failed'] = True result['rebooted'] = True - result['msg'] = toex.message + result['msg'] = to_native(toex) if post_reboot_delay != 0: display.vvv("win_reboot: waiting an additional %d seconds" % post_reboot_delay) diff --git a/lib/ansible/plugins/connection/winrm.py b/lib/ansible/plugins/connection/winrm.py index 81df18e4c9..d36fa5d32c 100644 --- a/lib/ansible/plugins/connection/winrm.py +++ b/lib/ansible/plugins/connection/winrm.py @@ -78,6 +78,17 @@ DOCUMENTATION = """ choices: [managed, manual] vars: - name: ansible_winrm_kinit_mode + connection_timeout: + description: + - Sets the operation and read timeout settings for the WinRM + connection. + - Corresponds to the C(operation_timeout_sec) and + C(read_timeout_sec) args in pywinrm so avoid setting these vars + with this one. + - The default value is whatever is set in the installed version of + pywinrm. + vars: + - name: ansible_winrm_connection_timeout """ import base64 @@ -170,6 +181,7 @@ class Connection(ConnectionBase): self._winrm_path = self._options['path'] self._kinit_cmd = self._options['kerberos_command'] self._winrm_transport = self._options['transport'] + self._winrm_connection_timeout = self._options['connection_timeout'] if hasattr(winrm, 'FEATURE_SUPPORTED_AUTHTYPES'): self._winrm_supported_authtypes = set(winrm.FEATURE_SUPPORTED_AUTHTYPES) @@ -261,7 +273,11 @@ class Connection(ConnectionBase): self._kerb_auth(self._winrm_user, self._winrm_pass) display.vvvvv('WINRM CONNECT: transport=%s endpoint=%s' % (transport, endpoint), host=self._winrm_host) try: - protocol = Protocol(endpoint, transport=transport, **self._winrm_kwargs) + winrm_kwargs = self._winrm_kwargs.copy() + if self._winrm_connection_timeout: + winrm_kwargs['operation_timeout_sec'] = self._winrm_connection_timeout + winrm_kwargs['read_timeout_sec'] = self._winrm_connection_timeout + 1 + protocol = Protocol(endpoint, transport=transport, **winrm_kwargs) # open the shell from connect so we know we're able to talk to the server if not self.shell_id: