#!/usr/bin/python # -*- coding: utf-8 -*- # # Copyright (c) 2024, IamLunchbox # GNU General Public License v3.0+ (see LICENSES/GPL-3.0-or-later.txt or https://www.gnu.org/licenses/gpl-3.0.txt) # SPDX-License-Identifier: GPL-3.0-or-later from __future__ import absolute_import, division, print_function __metaclass__ = type DOCUMENTATION = r""" module: proxmox_backup author: "Raphael Grieger (@IamLunchbox) " short_description: Start a VM backup in Proxmox VE cluster version_added: 10.1.0 description: - Allows you to create backups of KVM and LXC guests in Proxmox VE cluster. - Offers the GUI functionality of creating a single backup as well as using the run-now functionality from the cluster backup schedule. - The mininum required privileges to use this module are C(VM.Backup) and C(Datastore.AllocateSpace) for the respective VMs and storage. - Most options are optional and if unspecified will be chosen by the Cluster and its default values. - Note that this module B(is not idempotent). It always starts a new backup (when not in check mode). attributes: check_mode: support: full diff_mode: support: none options: backup_mode: description: - The mode how Proxmox performs backups. The default is, to create a runtime snapshot including memory. - Check U(https://pve.proxmox.com/pve-docs/chapter-vzdump.html#_backup_modes) for an explanation of the differences. type: str choices: ["snapshot", "suspend", "stop"] default: snapshot bandwidth: description: - Limit the I/O bandwidth (in KiB/s) to write backup. V(0) is unlimited. type: int change_detection_mode: description: - Set the change detection mode (available from Proxmox VE 8.3). - It is only used when backing up containers, Proxmox silently ignores this option when applied to kvm guests. type: str choices: ["legacy", "data", "metadata"] compress: description: - Enable additional compression of the backup archive. - V(0) will use the Proxmox recommended value, depending on your storage target. type: str choices: ["0", "1", "gzip", "lzo", "zstd"] compression_threads: description: - The number of threads zstd will use to compress the backup. - V(0) uses 50% of the available cores, anything larger than V(0) will use exactly as many threads. - Is ignored if you specify O(compress=gzip) or O(compress=lzo). type: int description: description: - Specify the description of the backup. - Needs to be a single line, newline and backslash need to be escaped as V(\\n) and V(\\\\) respectively. - If you need variable interpolation, you can set the content as usual through ansible jinja templating and/or let Proxmox substitute templates. - Proxmox currently supports V({{cluster}}), V({{guestname}}), V({{node}}), and V({{vmid}}) as templating variables. Since this is also a jinja delimiter, you need to set these values as raw jinja. default: "{{guestname}}" type: str fleecing: description: - Enable backup fleecing. Works only for virtual machines and their disks. - Must be entered as a string, containing key-value pairs in a list. type: str mode: description: - Specifices the mode to select backup targets. choices: ["include", "all", "pool"] required: true type: str node: description: - Only execute the backup job for the given node. - This option is usually used if O(mode=all). - If you specify a node ID and your vmids or pool do not reside there, they will not be backed up! type: str notification_mode: description: - Determine which notification system to use. type: str choices: ["auto", "legacy-sendmail", "notification-system"] default: auto performance_tweaks: description: - Enable other performance-related settings. - Must be entered as a string, containing comma separated key-value pairs. - 'For example: V(max-workers=2,pbs-entries-max=2).' type: str pool: description: - Specify a pool name to limit backups to guests to the given pool. - Required, when O(mode=pool). - Also required, when your user only has VM.Backup permission for this single pool. type: str protected: description: - Marks backups as protected. - '"Might fail, when the PBS backend has verify enabled due to this bug: U(https://bugzilla.proxmox.com/show_bug.cgi?id=4289)".' type: bool retention: description: - Use custom retention options instead of those from the default cluster configuration (which is usually V("keep-all=1")). - Always requires Datastore.Allocate permission at the storage endpoint. - Specifying a retention time other than V(keep-all=1) might trigger pruning on the datastore, if an existing backup should be deleted due to your specified timeframe. - Deleting requires C(Datastore.Modify) or C(Datastore.Prune) permissions on the backup storage. type: str storage: description: - Store the backup archive on this storage. type: str required: true vmids: description: - The instance IDs to be backed up. - Only valid, if O(mode=include). type: list elements: int wait: description: - Wait for the backup to be finished. - Fails, if job does not succeed successfully within the given timeout. type: bool default: false wait_timeout: description: - Seconds to wait for the backup to be finished. - Will only be evaluated, if O(wait=true). type: int default: 10 requirements: ["proxmoxer", "requests"] extends_documentation_fragment: - community.general.proxmox.actiongroup_proxmox - community.general.proxmox.documentation - community.general.attributes """ EXAMPLES = r""" - name: Backup all vms in the Proxmox cluster to storage mypbs community.general.proxmox_backup: api_user: root@pam api_password: secret api_host: node1 storage: mypbs mode: all - name: Backup VMID 100 by stopping it and set an individual retention community.general.proxmox_backup: api_user: root@pam api_password: secret api_host: node1 backup-mode: stop mode: include retention: keep-daily=5, keep-last=14, keep-monthly=4, keep-weekly=4, keep-yearly=0 storage: mypbs vmid: [100] - name: Backup all vms on node node2 to storage mypbs and wait for the task to finish community.general.proxmox_backup: api_user: test@pve api_password: 1q2w3e api_host: node2 storage: mypbs mode: all node: node2 wait: true wait_timeout: 30 - name: Use all the options community.general.proxmox_backup: api_user: root@pam api_password: secret api_host: node1 bandwidth: 1000 backup_mode: suspend compress: zstd compression_threads: 0 description: A single backup for {% raw %}{{ guestname }}{% endraw %} mode: include notification_mode: notification-system protected: true retention: keep-monthly=1, keep-weekly=1 storage: mypbs vmids: - 100 - 101 """ RETURN = r""" backups: description: List of nodes and their task IDs. returned: on success type: list elements: dict contains: node: description: Node ID. returned: on success type: str status: description: Last known task status. Will be unknown, if O(wait=false). returned: on success type: str choices: ["unknown", "success", "failed"] upid: description: >- Proxmox cluster UPID, which is needed to lookup task info. Returns OK, when a cluster node did not create a task after being called, for example due to no matching targets. returned: on success type: str """ import time from ansible.module_utils.basic import AnsibleModule from ansible.module_utils.common.text.converters import to_native from ansible_collections.community.general.plugins.module_utils.proxmox import ProxmoxAnsible, proxmox_auth_argument_spec def has_permission(permission_tree, permission, search_scopes, default=0, expected=1): return any(permission_tree.get(scope, {}).get(permission, default) == expected for scope in search_scopes) class ProxmoxBackupAnsible(ProxmoxAnsible): def _get_permissions(self): return self.proxmox_api.access.permissions.get() def _get_resources(self, resource_type=None): return self.proxmox_api.cluster.resources.get(type=resource_type) def _get_tasklog(self, node, upid): return self.proxmox_api.nodes(node).tasks(upid).log.get() def _get_taskok(self, node, upid): return self.proxmox_api.nodes(node).tasks(upid).status.get() def _post_vzdump(self, node, request_body): return self.proxmox_api.nodes(node).vzdump.post(**request_body) def request_backup( self, request_body, node_endpoints): task_ids = [] for node in node_endpoints: upid = self._post_vzdump(node, request_body) if upid != "OK": tasklog = ", ".join(logentry["t"] for logentry in self._get_tasklog(node, upid)) else: tasklog = "" task_ids.extend([{"node": node, "upid": upid, "status": "unknown", "log": "%s" % tasklog}]) return task_ids def check_relevant_nodes(self, node): nodes = [ item["node"] for item in self._get_resources("node") if item["status"] == "online" ] if node and node not in nodes: self.module.fail_json(msg="Node %s was specified, but does not exist on the cluster" % node) elif node: return [node] return nodes def check_storage_permissions( self, permissions, storage, bandwidth, performance, retention): # Check for Datastore.AllocateSpace in the permission tree if not has_permission(permissions, "Datastore.AllocateSpace", search_scopes=["/", "/storage/", "/storage/" + storage]): self.module.fail_json(changed=False, msg="Insufficient permission: Datastore.AllocateSpace is missing") if (bandwidth or performance) and has_permission(permissions, "Sys.Modify", search_scopes=["/"], expected=0): self.module.fail_json(changed=False, msg="Insufficient permission: Performance_tweaks and bandwidth require 'Sys.Modify' permission for '/'") if retention: if not has_permission(permissions, "Datastore.Allocate", search_scopes=["/", "/storage", "/storage/" + storage]): self.module.fail_json(changed=False, msg="Insufficient permissions: Custom retention was requested, but Datastore.Allocate is missing") def check_vmid_backup_permission(self, permissions, vmids, pool): sufficient_permissions = has_permission(permissions, "VM.Backup", search_scopes=["/", "/vms"]) if pool and not sufficient_permissions: sufficient_permissions = has_permission(permissions, "VM.Backup", search_scopes=["/pool/" + pool, "/pool/" + pool + "/vms"]) if not sufficient_permissions: # Since VM.Backup can be given for each vmid at a time, iterate through all of them # and check, if the permission is set failed_vmids = [] for vm in vmids: vm_path = "/vms/" + str(vm) if has_permission(permissions, "VM.Backup", search_scopes=[vm_path], default=1, expected=0): failed_vmids.append(str(vm)) if failed_vmids: self.module.fail_json( changed=False, msg="Insufficient permissions: " "You dont have the VM.Backup permission for VMID %s" % ", ".join(failed_vmids)) sufficient_permissions = True # Finally, when no check succeeded, fail if not sufficient_permissions: self.module.fail_json(changed=False, msg="Insufficient permissions: You do not have the VM.Backup permission") def check_general_backup_permission(self, permissions, pool): if not has_permission(permissions, "VM.Backup", search_scopes=["/", "/vms"] + (["/pool/" + pool] if pool else [])): self.module.fail_json(changed=False, msg="Insufficient permissions: You dont have the VM.Backup permission") def check_if_storage_exists(self, storage, node): storages = self.get_storages(type=None) # Loop through all cluster storages and get all matching storages validated_storagepath = [storageentry for storageentry in storages if storageentry["storage"] == storage] if not validated_storagepath: self.module.fail_json( changed=False, msg="Storage %s does not exist in the cluster" % storage) def check_vmids(self, vmids): cluster_vmids = [vm["vmid"] for vm in self._get_resources("vm")] if not cluster_vmids: self.module.warn( "VM.Audit permission is missing or there are no VMs. This task might fail if one VMID does not exist") return vmids_not_found = [str(vm) for vm in vmids if vm not in cluster_vmids] if vmids_not_found: self.module.warn( "VMIDs %s not found. This task will fail if one VMID does not exist" % ", ".join(vmids_not_found)) def wait_for_timeout(self, timeout, raw_tasks): # filter all entries, which did not get a task id from the Cluster tasks = [] ok_tasks = [] for node in raw_tasks: if node["upid"] != "OK": tasks.append(node) else: ok_tasks.append(node) start_time = time.time() # iterate through the task ids and check their values while True: for node in tasks: if node["status"] == "unknown": try: # proxmox.api_task_ok does not suffice, since it only # is true at `stopped` and `ok` status = self._get_taskok(node["node"], node["upid"]) if status["status"] == "stopped" and status["exitstatus"] == "OK": node["status"] = "success" if status["status"] == "stopped" and status["exitstatus"] == "job errors": node["status"] = "failed" except Exception as e: self.module.fail_json(msg="Unable to retrieve API task ID from node %s: %s" % (node["node"], e)) if len([item for item in tasks if item["status"] != "unknown"]) == len(tasks): break if time.time() > start_time + timeout: timeouted_nodes = [ node["node"] for node in tasks if node["status"] == "unknown" ] failed_nodes = [node["node"] for node in tasks if node["status"] == "failed"] if failed_nodes: self.module.fail_json( msg="Reached timeout while waiting for backup task. " "Nodes, who reached the timeout: %s. " "Nodes, which failed: %s" % (", ".join(timeouted_nodes), ", ".join(failed_nodes))) self.module.fail_json( msg="Reached timeout while waiting for creating VM snapshot. " "Nodes who reached the timeout: %s" % ", ".join(timeouted_nodes)) time.sleep(1) error_logs = [] for node in tasks: if node["status"] == "failed": tasklog = ", ".join([logentry["t"] for logentry in self._get_tasklog(node["node"], node["upid"])]) error_logs.append("%s: %s" % (node, tasklog)) if error_logs: self.module.fail_json( msg="An error occured creating the backups. " "These are the last log lines from the failed nodes: %s" % ", ".join(error_logs)) for node in tasks: tasklog = ", ".join([logentry["t"] for logentry in self._get_tasklog(node["node"], node["upid"])]) node["log"] = tasklog # Finally, reattach ok tasks to show, that all nodes were contacted tasks.extend(ok_tasks) return tasks def permission_check( self, storage, mode, node, bandwidth, performance_tweaks, retention, pool, vmids): permissions = self._get_permissions() self.check_if_storage_exists(storage, node) self.check_storage_permissions( permissions, storage, bandwidth, performance_tweaks, retention) if mode == "include": self.check_vmid_backup_permission(permissions, vmids, pool) else: self.check_general_backup_permission(permissions, pool) def prepare_request_parameters(self, module_arguments): # ensure only valid post parameters are passed to proxmox # list of dict items to replace with (new_val, old_val) post_params = [("bwlimit", "bandwidth"), ("compress", "compress"), ("fleecing", "fleecing"), ("mode", "backup_mode"), ("notes-template", "description"), ("notification-mode", "notification_mode"), ("pbs-change-detection-mode", "change_detection_mode"), ("performance", "performance_tweaks"), ("pool", "pool"), ("protected", "protected"), ("prune-backups", "retention"), ("storage", "storage"), ("zstd", "compression_threads"), ("vmid", "vmids")] request_body = {} for new, old in post_params: if module_arguments.get(old): request_body.update({new: module_arguments[old]}) # Set mode specific values if module_arguments["mode"] == "include": request_body.pop("pool", None) request_body["all"] = 0 elif module_arguments["mode"] == "all": request_body.pop("vmid", None) request_body.pop("pool", None) request_body["all"] = 1 elif module_arguments["mode"] == "pool": request_body.pop("vmid", None) request_body["all"] = 0 # Create comma separated list from vmids, the API expects so if request_body.get("vmid"): request_body.update({"vmid": ",".join(str(vmid) for vmid in request_body["vmid"])}) # remove whitespaces from option strings for key in ("prune-backups", "performance"): if request_body.get(key): request_body[key] = request_body[key].replace(" ", "") # convert booleans to 0/1 for key in ("protected",): if request_body.get(key): request_body[key] = 1 return request_body def backup_create( self, module_arguments, check_mode, node_endpoints): request_body = self.prepare_request_parameters(module_arguments) # stop here, before anything gets changed if check_mode: return [] task_ids = self.request_backup(request_body, node_endpoints) updated_task_ids = [] if module_arguments["wait"]: updated_task_ids = self.wait_for_timeout( module_arguments["wait_timeout"], task_ids) return updated_task_ids if updated_task_ids else task_ids def main(): module_args = proxmox_auth_argument_spec() backup_args = { "backup_mode": {"type": "str", "default": "snapshot", "choices": ["snapshot", "suspend", "stop"]}, "bandwidth": {"type": "int"}, "change_detection_mode": {"type": "str", "choices": ["legacy", "data", "metadata"]}, "compress": {"type": "str", "choices": ["0", "1", "gzip", "lzo", "zstd"]}, "compression_threads": {"type": "int"}, "description": {"type": "str", "default": "{{guestname}}"}, "fleecing": {"type": "str"}, "mode": {"type": "str", "required": True, "choices": ["include", "all", "pool"]}, "node": {"type": "str"}, "notification_mode": {"type": "str", "default": "auto", "choices": ["auto", "legacy-sendmail", "notification-system"]}, "performance_tweaks": {"type": "str"}, "pool": {"type": "str"}, "protected": {"type": "bool"}, "retention": {"type": "str"}, "storage": {"type": "str", "required": True}, "vmids": {"type": "list", "elements": "int"}, "wait": {"type": "bool", "default": False}, "wait_timeout": {"type": "int", "default": 10}} module_args.update(backup_args) module = AnsibleModule( argument_spec=module_args, supports_check_mode=True, required_if=[ ("mode", "include", ("vmids",), True), ("mode", "pool", ("pool",)) ] ) proxmox = ProxmoxBackupAnsible(module) bandwidth = module.params["bandwidth"] mode = module.params["mode"] node = module.params["node"] performance_tweaks = module.params["performance_tweaks"] pool = module.params["pool"] retention = module.params["retention"] storage = module.params["storage"] vmids = module.params["vmids"] proxmox.permission_check( storage, mode, node, bandwidth, performance_tweaks, retention, pool, vmids) if module.params["mode"] == "include": proxmox.check_vmids(module.params["vmids"]) node_endpoints = proxmox.check_relevant_nodes(module.params["node"]) try: result = proxmox.backup_create(module.params, module.check_mode, node_endpoints) except Exception as e: module.fail_json(msg="Creating backups failed with exception: %s" % to_native(e)) if module.check_mode: module.exit_json(backups=result, changed=True, msg="Backups would be created") elif len([entry for entry in result if entry["upid"] == "OK"]) == len(result): module.exit_json(backups=result, changed=False, msg="Backup request sent to proxmox, no tasks created") elif module.params["wait"]: module.exit_json(backups=result, changed=True, msg="Backups succeeded") else: module.exit_json(backups=result, changed=True, msg="Backup tasks created") if __name__ == "__main__": main()