ansible: add reboot playbook and short hostname support
- Add reboot.yml playbook with rolling reboot (serial: 1) - Uses systemd reboot.target for NixOS compatibility - Waits for each host to come back before proceeding - Update dynamic inventory to use short hostnames - ansible_host set to FQDN for connections - Allows -l testvm01 instead of -l testvm01.home.2rjus.net - Update static.yml to match short hostname convention Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -64,6 +64,7 @@ Non-flake hosts are defined in `inventory/static.yml`:
|
|||||||
|----------|-------------|---------|
|
|----------|-------------|---------|
|
||||||
| `run-upgrade.yml` | Trigger nixos-upgrade on hosts | `-l tier_prod` |
|
| `run-upgrade.yml` | Trigger nixos-upgrade on hosts | `-l tier_prod` |
|
||||||
| `restart-service.yml` | Restart a systemd service | `-l role_dns -e service=unbound` |
|
| `restart-service.yml` | Restart a systemd service | `-l role_dns -e service=unbound` |
|
||||||
|
| `reboot.yml` | Rolling reboot (one host at a time) | `-l tier_test` |
|
||||||
| `provision-approle.yml` | Deploy Vault credentials (single host only) | `-l testvm01` |
|
| `provision-approle.yml` | Deploy Vault credentials (single host only) | `-l testvm01` |
|
||||||
| `build-and-deploy-template.yml` | Build and deploy Proxmox template | (no limit needed) |
|
| `build-and-deploy-template.yml` | Build and deploy Proxmox template | (no limit needed) |
|
||||||
|
|
||||||
@@ -82,6 +83,9 @@ nix develop -c ansible-playbook ansible/playbooks/provision-approle.yml -l testv
|
|||||||
|
|
||||||
# Build and deploy Proxmox template
|
# Build and deploy Proxmox template
|
||||||
nix develop -c ansible-playbook ansible/playbooks/build-and-deploy-template.yml
|
nix develop -c ansible-playbook ansible/playbooks/build-and-deploy-template.yml
|
||||||
|
|
||||||
|
# Rolling reboot of test hosts (one at a time, waits for each to come back)
|
||||||
|
nix develop -c ansible-playbook ansible/playbooks/reboot.yml -l tier_test
|
||||||
```
|
```
|
||||||
|
|
||||||
## Excluding Flake Hosts
|
## Excluding Flake Hosts
|
||||||
|
|||||||
@@ -102,28 +102,32 @@ def build_inventory(hosts_data: dict) -> dict:
|
|||||||
|
|
||||||
fqdn = f"{hostname}.{domain}"
|
fqdn = f"{hostname}.{domain}"
|
||||||
|
|
||||||
|
# Use short hostname as inventory name, FQDN for connection
|
||||||
|
inventory_name = hostname
|
||||||
|
|
||||||
# Add to flake_hosts group
|
# Add to flake_hosts group
|
||||||
inventory["flake_hosts"]["hosts"].append(fqdn)
|
inventory["flake_hosts"]["hosts"].append(inventory_name)
|
||||||
|
|
||||||
# Add host variables
|
# Add host variables
|
||||||
inventory["_meta"]["hostvars"][fqdn] = {
|
inventory["_meta"]["hostvars"][inventory_name] = {
|
||||||
|
"ansible_host": fqdn, # Connect using FQDN
|
||||||
|
"fqdn": fqdn,
|
||||||
"tier": tier,
|
"tier": tier,
|
||||||
"role": role,
|
"role": role,
|
||||||
"short_hostname": hostname,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# Group by tier
|
# Group by tier
|
||||||
tier_group = f"tier_{sanitize_group_name(tier)}"
|
tier_group = f"tier_{sanitize_group_name(tier)}"
|
||||||
if tier_group not in tier_groups:
|
if tier_group not in tier_groups:
|
||||||
tier_groups[tier_group] = []
|
tier_groups[tier_group] = []
|
||||||
tier_groups[tier_group].append(fqdn)
|
tier_groups[tier_group].append(inventory_name)
|
||||||
|
|
||||||
# Group by role (if set)
|
# Group by role (if set)
|
||||||
if role:
|
if role:
|
||||||
role_group = f"role_{sanitize_group_name(role)}"
|
role_group = f"role_{sanitize_group_name(role)}"
|
||||||
if role_group not in role_groups:
|
if role_group not in role_groups:
|
||||||
role_groups[role_group] = []
|
role_groups[role_group] = []
|
||||||
role_groups[role_group].append(fqdn)
|
role_groups[role_group].append(inventory_name)
|
||||||
|
|
||||||
# Add tier groups to inventory
|
# Add tier groups to inventory
|
||||||
for group_name, hosts in tier_groups.items():
|
for group_name, hosts in tier_groups.items():
|
||||||
|
|||||||
@@ -2,9 +2,12 @@
|
|||||||
#
|
#
|
||||||
# Hosts defined here are merged with the dynamic flake inventory.
|
# Hosts defined here are merged with the dynamic flake inventory.
|
||||||
# Use this for infrastructure that isn't managed by NixOS.
|
# Use this for infrastructure that isn't managed by NixOS.
|
||||||
|
#
|
||||||
|
# Use short hostnames as inventory names with ansible_host for FQDN.
|
||||||
|
|
||||||
all:
|
all:
|
||||||
children:
|
children:
|
||||||
proxmox:
|
proxmox:
|
||||||
hosts:
|
hosts:
|
||||||
pve1.home.2rjus.net:
|
pve1:
|
||||||
|
ansible_host: pve1.home.2rjus.net
|
||||||
|
|||||||
48
ansible/playbooks/reboot.yml
Normal file
48
ansible/playbooks/reboot.yml
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
---
|
||||||
|
# Reboot hosts with rolling strategy to avoid taking down redundant services
|
||||||
|
#
|
||||||
|
# Usage examples:
|
||||||
|
# # Reboot a single host
|
||||||
|
# ansible-playbook reboot.yml -l testvm01
|
||||||
|
#
|
||||||
|
# # Reboot all test hosts (one at a time)
|
||||||
|
# ansible-playbook reboot.yml -l tier_test
|
||||||
|
#
|
||||||
|
# # Reboot all DNS servers safely (one at a time)
|
||||||
|
# ansible-playbook reboot.yml -l role_dns
|
||||||
|
#
|
||||||
|
# Safety features:
|
||||||
|
# - serial: 1 ensures only one host reboots at a time
|
||||||
|
# - Waits for host to come back online before proceeding
|
||||||
|
# - Groups hosts by role to avoid rebooting same-role hosts consecutively
|
||||||
|
|
||||||
|
- name: Reboot hosts (rolling)
|
||||||
|
hosts: all
|
||||||
|
serial: 1
|
||||||
|
order: shuffle # Randomize to spread out same-role hosts
|
||||||
|
gather_facts: false
|
||||||
|
|
||||||
|
vars:
|
||||||
|
reboot_timeout: 300 # 5 minutes to wait for host to come back
|
||||||
|
|
||||||
|
tasks:
|
||||||
|
- name: Display reboot target
|
||||||
|
ansible.builtin.debug:
|
||||||
|
msg: "Rebooting {{ inventory_hostname }} (role: {{ role | default('none') }})"
|
||||||
|
|
||||||
|
- name: Reboot the host
|
||||||
|
ansible.builtin.systemd:
|
||||||
|
name: reboot.target
|
||||||
|
state: started
|
||||||
|
async: 1
|
||||||
|
poll: 0
|
||||||
|
ignore_errors: true
|
||||||
|
|
||||||
|
- name: Wait for host to come back online
|
||||||
|
ansible.builtin.wait_for_connection:
|
||||||
|
delay: 5
|
||||||
|
timeout: "{{ reboot_timeout }}"
|
||||||
|
|
||||||
|
- name: Display reboot result
|
||||||
|
ansible.builtin.debug:
|
||||||
|
msg: "{{ inventory_hostname }} rebooted successfully"
|
||||||
Reference in New Issue
Block a user