From 12c252653bf17b0d70d1099500093db64249d0e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torjus=20H=C3=A5kestad?= Date: Mon, 9 Feb 2026 21:56:32 +0100 Subject: [PATCH] ansible: add reboot playbook and short hostname support - Add reboot.yml playbook with rolling reboot (serial: 1) - Uses systemd reboot.target for NixOS compatibility - Waits for each host to come back before proceeding - Update dynamic inventory to use short hostnames - ansible_host set to FQDN for connections - Allows -l testvm01 instead of -l testvm01.home.2rjus.net - Update static.yml to match short hostname convention Co-Authored-By: Claude Opus 4.5 --- ansible/README.md | 4 +++ ansible/inventory/dynamic_flake.py | 14 +++++---- ansible/inventory/static.yml | 5 +++- ansible/playbooks/reboot.yml | 48 ++++++++++++++++++++++++++++++ 4 files changed, 65 insertions(+), 6 deletions(-) create mode 100644 ansible/playbooks/reboot.yml diff --git a/ansible/README.md b/ansible/README.md index c36e30d..377c4ba 100644 --- a/ansible/README.md +++ b/ansible/README.md @@ -64,6 +64,7 @@ Non-flake hosts are defined in `inventory/static.yml`: |----------|-------------|---------| | `run-upgrade.yml` | Trigger nixos-upgrade on hosts | `-l tier_prod` | | `restart-service.yml` | Restart a systemd service | `-l role_dns -e service=unbound` | +| `reboot.yml` | Rolling reboot (one host at a time) | `-l tier_test` | | `provision-approle.yml` | Deploy Vault credentials (single host only) | `-l testvm01` | | `build-and-deploy-template.yml` | Build and deploy Proxmox template | (no limit needed) | @@ -82,6 +83,9 @@ nix develop -c ansible-playbook ansible/playbooks/provision-approle.yml -l testv # Build and deploy Proxmox template nix develop -c ansible-playbook ansible/playbooks/build-and-deploy-template.yml + +# Rolling reboot of test hosts (one at a time, waits for each to come back) +nix develop -c ansible-playbook ansible/playbooks/reboot.yml -l tier_test ``` ## Excluding Flake Hosts diff --git a/ansible/inventory/dynamic_flake.py b/ansible/inventory/dynamic_flake.py index b34c50d..6d4b352 100755 --- a/ansible/inventory/dynamic_flake.py +++ b/ansible/inventory/dynamic_flake.py @@ -102,28 +102,32 @@ def build_inventory(hosts_data: dict) -> dict: fqdn = f"{hostname}.{domain}" + # Use short hostname as inventory name, FQDN for connection + inventory_name = hostname + # Add to flake_hosts group - inventory["flake_hosts"]["hosts"].append(fqdn) + inventory["flake_hosts"]["hosts"].append(inventory_name) # Add host variables - inventory["_meta"]["hostvars"][fqdn] = { + inventory["_meta"]["hostvars"][inventory_name] = { + "ansible_host": fqdn, # Connect using FQDN + "fqdn": fqdn, "tier": tier, "role": role, - "short_hostname": hostname, } # Group by tier tier_group = f"tier_{sanitize_group_name(tier)}" if tier_group not in tier_groups: tier_groups[tier_group] = [] - tier_groups[tier_group].append(fqdn) + tier_groups[tier_group].append(inventory_name) # Group by role (if set) if role: role_group = f"role_{sanitize_group_name(role)}" if role_group not in role_groups: role_groups[role_group] = [] - role_groups[role_group].append(fqdn) + role_groups[role_group].append(inventory_name) # Add tier groups to inventory for group_name, hosts in tier_groups.items(): diff --git a/ansible/inventory/static.yml b/ansible/inventory/static.yml index 03d6e66..a28f314 100644 --- a/ansible/inventory/static.yml +++ b/ansible/inventory/static.yml @@ -2,9 +2,12 @@ # # Hosts defined here are merged with the dynamic flake inventory. # Use this for infrastructure that isn't managed by NixOS. +# +# Use short hostnames as inventory names with ansible_host for FQDN. all: children: proxmox: hosts: - pve1.home.2rjus.net: + pve1: + ansible_host: pve1.home.2rjus.net diff --git a/ansible/playbooks/reboot.yml b/ansible/playbooks/reboot.yml new file mode 100644 index 0000000..916b462 --- /dev/null +++ b/ansible/playbooks/reboot.yml @@ -0,0 +1,48 @@ +--- +# Reboot hosts with rolling strategy to avoid taking down redundant services +# +# Usage examples: +# # Reboot a single host +# ansible-playbook reboot.yml -l testvm01 +# +# # Reboot all test hosts (one at a time) +# ansible-playbook reboot.yml -l tier_test +# +# # Reboot all DNS servers safely (one at a time) +# ansible-playbook reboot.yml -l role_dns +# +# Safety features: +# - serial: 1 ensures only one host reboots at a time +# - Waits for host to come back online before proceeding +# - Groups hosts by role to avoid rebooting same-role hosts consecutively + +- name: Reboot hosts (rolling) + hosts: all + serial: 1 + order: shuffle # Randomize to spread out same-role hosts + gather_facts: false + + vars: + reboot_timeout: 300 # 5 minutes to wait for host to come back + + tasks: + - name: Display reboot target + ansible.builtin.debug: + msg: "Rebooting {{ inventory_hostname }} (role: {{ role | default('none') }})" + + - name: Reboot the host + ansible.builtin.systemd: + name: reboot.target + state: started + async: 1 + poll: 0 + ignore_errors: true + + - name: Wait for host to come back online + ansible.builtin.wait_for_connection: + delay: 5 + timeout: "{{ reboot_timeout }}" + + - name: Display reboot result + ansible.builtin.debug: + msg: "{{ inventory_hostname }} rebooted successfully"