From 83de9a3ffb86ff89023731c3714159d1bee948b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torjus=20H=C3=A5kestad?= Date: Sun, 1 Feb 2026 16:34:28 +0100 Subject: [PATCH 1/7] pipeline: add testing improvements for branch-based workflows Implement dual improvements to enable efficient testing of pipeline changes without polluting master branch: 1. Add --force flag to create-host script - Skip hostname/IP uniqueness validation - Overwrite existing host configurations - Update entries in flake.nix and terraform/vms.tf (no duplicates) - Useful for iterating on configurations during testing 2. Add branch support to bootstrap mechanism - Bootstrap service reads NIXOS_FLAKE_BRANCH environment variable - Defaults to master if not set - Uses branch in git URL via ?ref= parameter - Service loads environment from /etc/environment 3. Add cloud-init disk support for branch configuration - VMs can specify flake_branch field in terraform/vms.tf - Automatically generates cloud-init snippet setting NIXOS_FLAKE_BRANCH - Uploads snippet to Proxmox via SSH - Production VMs omit flake_branch and use master 4. Update documentation - Document --force flag usage in create-host README - Add branch testing examples in terraform README - Update TODO.md with testing workflow - Add .generated/ to gitignore Testing workflow: Create feature branch, set flake_branch in VM definition, deploy with terraform, iterate with --force flag, clean up before merging. Co-Authored-By: Claude Sonnet 4.5 --- .gitignore | 1 + TODO.md | 84 +++++++++++++++++++++++++++-- hosts/template2/bootstrap.nix | 9 +++- scripts/create-host/README.md | 21 ++++++++ scripts/create-host/create_host.py | 23 +++++--- scripts/create-host/manipulators.py | 80 +++++++++++++++++---------- terraform/README.md | 18 +++++++ terraform/cloud-init.tf | 55 +++++++++++++++++++ terraform/variables.tf | 6 +++ terraform/vms.tf | 11 ++++ 10 files changed, 268 insertions(+), 40 deletions(-) create mode 100644 terraform/cloud-init.tf diff --git a/.gitignore b/.gitignore index 8068363..11ce9bc 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ terraform/terraform.tfvars terraform/*.auto.tfvars terraform/crash.log terraform/crash.*.log +terraform/.generated/ diff --git a/TODO.md b/TODO.md index 70e81a5..5ee72c5 100644 --- a/TODO.md +++ b/TODO.md @@ -54,6 +54,7 @@ Automate the entire process of creating, configuring, and deploying new NixOS ho **Status:** ✅ Fully implemented and tested **Completed:** 2025-02-01 +**Enhanced:** 2025-02-01 (added --force flag) **Goal:** Automate creation of host configuration files @@ -64,6 +65,7 @@ Automate the entire process of creating, configuring, and deploying new NixOS ho - Comprehensive validation (hostname format/uniqueness, IP subnet/uniqueness) - Jinja2 templates for NixOS configurations - Automatic updates to flake.nix and terraform/vms.tf +- `--force` flag for regenerating existing configurations (useful for testing) **Tasks:** - [x] Create Python CLI with typer framework @@ -109,6 +111,7 @@ create-host \ **Status:** ✅ Fully implemented and tested **Completed:** 2025-02-01 +**Enhanced:** 2025-02-01 (added branch support for testing) **Goal:** Get freshly deployed VM to apply its specific host configuration @@ -118,7 +121,8 @@ create-host \ - Systemd service `nixos-bootstrap.service` runs on first boot - Depends on `cloud-config.service` to ensure hostname is set - Reads hostname from `hostnamectl` (set by cloud-init via Terraform) -- Runs `nixos-rebuild boot --flake git+https://git.t-juice.club/torjus/nixos-servers.git#${hostname}` +- Supports custom git branch via `NIXOS_FLAKE_BRANCH` environment variable +- Runs `nixos-rebuild boot --flake git+https://git.t-juice.club/torjus/nixos-servers.git?ref=$BRANCH#${hostname}` - Reboots into new configuration on success - Fails gracefully without reboot on errors (network issues, missing config) - Service self-destructs after successful bootstrap (not in new config) @@ -240,10 +244,80 @@ Since most hosts use static IPs defined in their NixOS configurations, we can ex ### Phase 7: Testing & Documentation -**Tasks:** -- [ ] Test full pipeline end-to-end -- [ ] Create test host and verify all steps -- [ ] Document the new workflow in CLAUDE.md +**Status:** 🚧 In Progress (testing improvements completed) + +**Testing Improvements Implemented (2025-02-01):** + +The pipeline now supports efficient testing without polluting master branch: + +**1. --force Flag for create-host** +- Re-run `create-host` to regenerate existing configurations +- Updates existing entries in flake.nix and terraform/vms.tf (no duplicates) +- Skip uniqueness validation checks +- Useful for iterating on configuration templates during testing + +**2. Branch Support for Bootstrap** +- Bootstrap service reads `NIXOS_FLAKE_BRANCH` environment variable +- Defaults to `master` if not set +- Allows testing pipeline changes on feature branches +- Cloud-init passes branch via `/etc/environment` + +**3. Cloud-init Disk for Branch Configuration** +- Terraform generates custom cloud-init snippets for test VMs +- Set `flake_branch` field in VM definition to use non-master branch +- Production VMs omit this field and use master (default) +- Files automatically uploaded to Proxmox via SSH + +**Testing Workflow:** + +```bash +# 1. Create test branch +git checkout -b test-pipeline + +# 2. Generate or update host config +create-host --hostname testvm01 --ip 10.69.13.100/24 + +# 3. Edit terraform/vms.tf to add test VM with branch +# vms = { +# "testvm01" = { +# ip = "10.69.13.100/24" +# flake_branch = "test-pipeline" # Bootstrap from this branch +# } +# } + +# 4. Commit and push test branch +git add -A && git commit -m "test: add testvm01" +git push origin test-pipeline + +# 5. Deploy VM +cd terraform && tofu apply + +# 6. Watch bootstrap (VM fetches from test-pipeline branch) +ssh root@10.69.13.100 +journalctl -fu nixos-bootstrap.service + +# 7. Iterate: modify templates and regenerate with --force +cd .. && create-host --hostname testvm01 --ip 10.69.13.100/24 --force +git commit -am "test: update config" && git push + +# Redeploy to test fresh bootstrap +cd terraform +tofu destroy -target=proxmox_vm_qemu.vm[\"testvm01\"] && tofu apply + +# 8. Clean up when done: squash commits, merge to master, remove test VM +``` + +**Files:** +- `scripts/create-host/create_host.py` - Added --force parameter +- `scripts/create-host/manipulators.py` - Update vs insert logic +- `hosts/template2/bootstrap.nix` - Branch support via environment variable +- `terraform/vms.tf` - flake_branch field support +- `terraform/cloud-init.tf` - Custom cloud-init disk generation +- `terraform/variables.tf` - proxmox_host variable for SSH uploads + +**Remaining Tasks:** +- [ ] Test full pipeline end-to-end on feature branch +- [ ] Update CLAUDE.md with testing workflow - [ ] Add troubleshooting section - [ ] Create examples for common scenarios (DHCP host, static IP host, etc.) diff --git a/hosts/template2/bootstrap.nix b/hosts/template2/bootstrap.nix index da660f0..bbbe8cc 100644 --- a/hosts/template2/bootstrap.nix +++ b/hosts/template2/bootstrap.nix @@ -24,8 +24,12 @@ let echo "Network connectivity confirmed" echo "Fetching and building NixOS configuration from flake..." + # Read git branch from environment, default to master + BRANCH="''${NIXOS_FLAKE_BRANCH:-master}" + echo "Using git branch: $BRANCH" + # Build and activate the host-specific configuration - FLAKE_URL="git+https://git.t-juice.club/torjus/nixos-servers.git#''${HOSTNAME}" + FLAKE_URL="git+https://git.t-juice.club/torjus/nixos-servers.git?ref=$BRANCH#''${HOSTNAME}" if nixos-rebuild boot --flake "$FLAKE_URL"; then echo "Successfully built configuration for $HOSTNAME" @@ -58,6 +62,9 @@ in RemainAfterExit = true; ExecStart = "${bootstrap-script}/bin/nixos-bootstrap"; + # Read environment variables from /etc/environment (set by cloud-init) + EnvironmentFile = "-/etc/environment"; + # Logging to journald StandardOutput = "journal+console"; StandardError = "journal+console"; diff --git a/scripts/create-host/README.md b/scripts/create-host/README.md index 3169287..18c4b2d 100644 --- a/scripts/create-host/README.md +++ b/scripts/create-host/README.md @@ -50,6 +50,23 @@ python -m scripts.create_host.create_host create \ --dry-run ``` +### Force Mode (Regenerate Existing Configuration) + +Overwrite an existing host configuration (useful for testing): + +```bash +python -m scripts.create_host.create_host create \ + --hostname test01 \ + --ip 10.69.13.50/24 \ + --force +``` + +This mode: +- Skips hostname and IP uniqueness validation +- Overwrites files in `hosts//` +- Updates existing entries in `flake.nix` and `terraform/vms.tf` (doesn't duplicate) +- Useful for iterating on configuration templates during testing + ### Options - `--hostname` (required): Hostname for the new host @@ -73,6 +90,10 @@ python -m scripts.create_host.create_host create \ - `--dry-run` (flag): Preview changes without creating files +- `--force` (flag): Overwrite existing host configuration + - Skips uniqueness validation + - Updates existing entries instead of creating duplicates + ## What It Does The tool performs the following actions: diff --git a/scripts/create-host/create_host.py b/scripts/create-host/create_host.py index 7756444..22ad641 100644 --- a/scripts/create-host/create_host.py +++ b/scripts/create-host/create_host.py @@ -45,6 +45,7 @@ def main( memory: int = typer.Option(2048, "--memory", help="Memory in MB"), disk: str = typer.Option("20G", "--disk", help="Disk size (e.g., 20G, 50G, 100G)"), dry_run: bool = typer.Option(False, "--dry-run", help="Preview changes without creating files"), + force: bool = typer.Option(False, "--force", help="Overwrite existing host configuration"), ) -> None: """ Create a new NixOS host configuration. @@ -75,11 +76,20 @@ def main( config.validate() validate_hostname_format(hostname) - validate_hostname_unique(hostname, repo_root) + + # Skip uniqueness checks in force mode + if not force: + validate_hostname_unique(hostname, repo_root) + if ip: + validate_ip_unique(ip, repo_root) + else: + # Check if we're actually overwriting something + host_dir = repo_root / "hosts" / hostname + if host_dir.exists(): + console.print(f"[yellow]⚠[/yellow] Updating existing host configuration for {hostname}") if ip: validate_ip_subnet(ip) - validate_ip_unique(ip, repo_root) console.print("[green]✓[/green] All validations passed\n") @@ -96,13 +106,14 @@ def main( console.print("\n[bold blue]Generating host configuration...[/bold blue]") generate_host_files(config, repo_root) - console.print(f"[green]✓[/green] Created hosts/{hostname}/default.nix") - console.print(f"[green]✓[/green] Created hosts/{hostname}/configuration.nix") + action = "Updated" if force else "Created" + console.print(f"[green]✓[/green] {action} hosts/{hostname}/default.nix") + console.print(f"[green]✓[/green] {action} hosts/{hostname}/configuration.nix") - update_flake_nix(config, repo_root) + update_flake_nix(config, repo_root, force=force) console.print("[green]✓[/green] Updated flake.nix") - update_terraform_vms(config, repo_root) + update_terraform_vms(config, repo_root, force=force) console.print("[green]✓[/green] Updated terraform/vms.tf") # Success message diff --git a/scripts/create-host/manipulators.py b/scripts/create-host/manipulators.py index b0dca17..4d821cc 100644 --- a/scripts/create-host/manipulators.py +++ b/scripts/create-host/manipulators.py @@ -6,21 +6,18 @@ from pathlib import Path from models import HostConfig -def update_flake_nix(config: HostConfig, repo_root: Path) -> None: +def update_flake_nix(config: HostConfig, repo_root: Path, force: bool = False) -> None: """ - Add new host entry to flake.nix nixosConfigurations. + Add or update host entry in flake.nix nixosConfigurations. Args: config: Host configuration repo_root: Path to repository root + force: If True, replace existing entry; if False, insert new entry """ flake_path = repo_root / "flake.nix" content = flake_path.read_text() - # Find the closing of nixosConfigurations block - # Pattern: " };\n packages =" - pattern = r"( \};)\n( packages =)" - # Create new entry new_entry = f""" {config.hostname} = nixpkgs.lib.nixosSystem {{ inherit system; @@ -40,35 +37,47 @@ def update_flake_nix(config: HostConfig, repo_root: Path) -> None: }}; """ - # Insert new entry before closing brace - replacement = rf"\g<1>\n{new_entry}\g<2>" + # Check if hostname already exists + hostname_pattern = rf"^ {re.escape(config.hostname)} = nixpkgs\.lib\.nixosSystem" + existing_match = re.search(hostname_pattern, content, re.MULTILINE) - new_content, count = re.subn(pattern, replacement, content) + if existing_match and force: + # Replace existing entry + # Match the entire block from "hostname = " to "};" + replace_pattern = rf"^ {re.escape(config.hostname)} = nixpkgs\.lib\.nixosSystem \{{.*?^ \}};\n" + new_content, count = re.subn(replace_pattern, new_entry, content, flags=re.MULTILINE | re.DOTALL) - if count == 0: - raise ValueError( - "Could not find insertion point in flake.nix. " - "Looking for pattern: ' };\\n devShells ='" - ) + if count == 0: + raise ValueError(f"Could not find existing entry for {config.hostname} in flake.nix") + else: + # Insert new entry before closing brace + # Pattern: " };\n packages =" + pattern = r"( \};)\n( packages =)" + replacement = rf"\g<1>\n{new_entry}\g<2>" + + new_content, count = re.subn(pattern, replacement, content) + + if count == 0: + raise ValueError( + "Could not find insertion point in flake.nix. " + "Looking for pattern: ' };\\n packages ='" + ) flake_path.write_text(new_content) -def update_terraform_vms(config: HostConfig, repo_root: Path) -> None: +def update_terraform_vms(config: HostConfig, repo_root: Path, force: bool = False) -> None: """ - Add new VM entry to terraform/vms.tf locals.vms map. + Add or update VM entry in terraform/vms.tf locals.vms map. Args: config: Host configuration repo_root: Path to repository root + force: If True, replace existing entry; if False, insert new entry """ terraform_path = repo_root / "terraform" / "vms.tf" content = terraform_path.read_text() - # Find the closing of locals.vms block - # Pattern: " }\n\n # Compute VM configurations" - pattern = r"( \})\n\n( # Compute VM configurations)" - # Create new entry based on whether we have static IP or DHCP if config.is_static_ip: new_entry = f''' "{config.hostname}" = {{ @@ -86,15 +95,30 @@ def update_terraform_vms(config: HostConfig, repo_root: Path) -> None: }} ''' - # Insert new entry before closing brace - replacement = rf"{new_entry}\g<1>\n\n\g<2>" + # Check if hostname already exists + hostname_pattern = rf'^\s+"{re.escape(config.hostname)}" = \{{' + existing_match = re.search(hostname_pattern, content, re.MULTILINE) - new_content, count = re.subn(pattern, replacement, content) + if existing_match and force: + # Replace existing entry + # Match the entire block from "hostname" = { to } + replace_pattern = rf'^\s+"{re.escape(config.hostname)}" = \{{.*?^\s+\}}\n' + new_content, count = re.subn(replace_pattern, new_entry, content, flags=re.MULTILINE | re.DOTALL) - if count == 0: - raise ValueError( - "Could not find insertion point in terraform/vms.tf. " - "Looking for pattern: ' }\\n\\n # Compute VM configurations'" - ) + if count == 0: + raise ValueError(f"Could not find existing entry for {config.hostname} in terraform/vms.tf") + else: + # Insert new entry before closing brace + # Pattern: " }\n\n # Compute VM configurations" + pattern = r"( \})\n\n( # Compute VM configurations)" + replacement = rf"{new_entry}\g<1>\n\n\g<2>" + + new_content, count = re.subn(pattern, replacement, content) + + if count == 0: + raise ValueError( + "Could not find insertion point in terraform/vms.tf. " + "Looking for pattern: ' }\\n\\n # Compute VM configurations'" + ) terraform_path.write_text(new_content) diff --git a/terraform/README.md b/terraform/README.md index 2cca37f..38fffaa 100644 --- a/terraform/README.md +++ b/terraform/README.md @@ -87,6 +87,21 @@ vms = { } ``` +### Example: Test VM with Custom Git Branch + +For testing pipeline changes without polluting master: + +```hcl +vms = { + "test-vm" = { + ip = "10.69.13.100/24" + flake_branch = "test-pipeline" # Bootstrap from this branch + } +} +``` + +This VM will bootstrap from the `test-pipeline` branch instead of `master`. Production VMs should omit the `flake_branch` field. + ## Configuration Options Each VM in the `vms` map supports the following fields (all optional): @@ -98,6 +113,7 @@ Each VM in the `vms` map supports the following fields (all optional): | `cpu_cores` | Number of CPU cores | `2` | | `memory` | Memory in MB | `2048` | | `disk_size` | Disk size (e.g., "20G", "100G") | `"20G"` | +| `flake_branch` | Git branch for bootstrap (for testing, omit for production) | `master` | | `target_node` | Proxmox node to deploy to | `"pve1"` | | `template_name` | Template VM to clone from | `"nixos-25.11.20260128.fa83fd8"` | | `storage` | Storage backend | `"local-zfs"` | @@ -182,9 +198,11 @@ deployment_summary = { - `main.tf` - Provider configuration - `variables.tf` - Variable definitions and defaults - `vms.tf` - VM definitions and deployment logic +- `cloud-init.tf` - Custom cloud-init configuration for branch-specific bootstrap - `outputs.tf` - Output definitions for deployed VMs - `terraform.tfvars.example` - Example credentials file - `terraform.tfvars` - Your actual credentials (gitignored) +- `.generated/` - Auto-generated cloud-init files (gitignored) - `vm.tf.old` - Archived single-VM configuration (reference) ## Notes diff --git a/terraform/cloud-init.tf b/terraform/cloud-init.tf new file mode 100644 index 0000000..2779145 --- /dev/null +++ b/terraform/cloud-init.tf @@ -0,0 +1,55 @@ +# Cloud-init configuration for branch-specific bootstrap +# +# This file manages custom cloud-init snippets for VMs that need to bootstrap +# from a specific git branch (non-master). Production VMs omit flake_branch +# and use the default master branch. + +# Generate cloud-init snippets for VMs with custom branch configuration +resource "local_file" "cloud_init_branch" { + for_each = { + for name, vm in local.vm_configs : name => vm + if vm.flake_branch != null + } + + filename = "${path.module}/.generated/cloud-init-${each.key}.yml" + content = yamlencode({ + # Write NIXOS_FLAKE_BRANCH to /etc/environment + # This will be read by bootstrap.nix service via EnvironmentFile + write_files = [{ + path = "/etc/environment" + content = "NIXOS_FLAKE_BRANCH=${each.value.flake_branch}\n" + append = true + }] + }) + + file_permission = "0644" +} + +# Upload cloud-init snippets to Proxmox +# Note: This requires SSH access to the Proxmox host +# Alternative: Manually copy files or use Proxmox API if available +resource "null_resource" "upload_cloud_init" { + for_each = { + for name, vm in local.vm_configs : name => vm + if vm.flake_branch != null + } + + # Trigger re-upload when content changes + triggers = { + content_hash = local_file.cloud_init_branch[each.key].content + } + + # Upload the cloud-init file to Proxmox snippets directory + provisioner "local-exec" { + command = <<-EOT + scp -o StrictHostKeyChecking=no \ + ${local_file.cloud_init_branch[each.key].filename} \ + ${var.proxmox_host}:/var/lib/vz/snippets/cloud-init-${each.key}.yml + EOT + } + + depends_on = [local_file.cloud_init_branch] +} + +# Ensure VMs depend on cloud-init being uploaded +# This is handled implicitly by the cicustom reference in vms.tf diff --git a/terraform/variables.tf b/terraform/variables.tf index fe13cb0..c6acc26 100644 --- a/terraform/variables.tf +++ b/terraform/variables.tf @@ -21,6 +21,12 @@ variable "proxmox_tls_insecure" { default = true } +variable "proxmox_host" { + description = "Proxmox host for SSH access (used to upload cloud-init snippets)" + type = string + default = "pve1.home.2rjus.net" +} + # Default values for VM configurations # These can be overridden per-VM in vms.tf diff --git a/terraform/vms.tf b/terraform/vms.tf index 6570e70..eecac99 100644 --- a/terraform/vms.tf +++ b/terraform/vms.tf @@ -22,6 +22,12 @@ locals { # disk_size = "50G" # } + # Example Test VM with custom git branch (for testing pipeline changes): + # "test-vm" = { + # ip = "10.69.13.100/24" + # flake_branch = "test-pipeline" # Bootstrap from this branch instead of master + # } + # Example Minimal VM using all defaults (uncomment to deploy): # "minimal-vm" = {} # "bootstrap-verify-test" = {} @@ -44,6 +50,8 @@ locals { # Network configuration - detect DHCP vs static ip = lookup(vm, "ip", null) gateway = lookup(vm, "gateway", var.default_gateway) + # Branch configuration for bootstrap (optional, uses master if not set) + flake_branch = lookup(vm, "flake_branch", null) } } } @@ -111,6 +119,9 @@ resource "proxmox_vm_qemu" "vm" { # Network configuration - DHCP or static IP ipconfig0 = each.value.ip != null ? "ip=${each.value.ip},gw=${each.value.gateway}" : "ip=dhcp" + # Custom cloud-init disk for branch configuration (if flake_branch is set) + cicustom = each.value.flake_branch != null ? "user=${each.value.storage}:snippets/cloud-init-${each.key}.yml" : null + # Skip IPv6 since we don't use it skip_ipv6 = true From 7fe0aa0f549bd0cbba0ba01f9d139ffbf2903d0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torjus=20H=C3=A5kestad?= Date: Sun, 1 Feb 2026 16:38:13 +0100 Subject: [PATCH 2/7] test: add testvm01 for pipeline testing --- flake.nix | 16 +++++++++ hosts/testvm01/configuration.nix | 61 ++++++++++++++++++++++++++++++++ hosts/testvm01/default.nix | 5 +++ terraform/vms.tf | 7 ++++ 4 files changed, 89 insertions(+) create mode 100644 hosts/testvm01/configuration.nix create mode 100644 hosts/testvm01/default.nix diff --git a/flake.nix b/flake.nix index f585f52..e3b6054 100644 --- a/flake.nix +++ b/flake.nix @@ -335,6 +335,22 @@ ]; }; }; + testvm01 = nixpkgs.lib.nixosSystem { + inherit system; + specialArgs = { + inherit inputs self sops-nix; + }; + modules = [ + ( + { config, pkgs, ... }: + { + nixpkgs.overlays = commonOverlays; + } + ) + ./hosts/testvm01 + sops-nix.nixosModules.sops + ]; + }; packages = forAllSystems ( { pkgs }: { diff --git a/hosts/testvm01/configuration.nix b/hosts/testvm01/configuration.nix new file mode 100644 index 0000000..f5b0fdf --- /dev/null +++ b/hosts/testvm01/configuration.nix @@ -0,0 +1,61 @@ +{ + config, + lib, + pkgs, + ... +}: + +{ + imports = [ + ../template2/hardware-configuration.nix + + ../../system + ../../common/vm + ]; + + nixpkgs.config.allowUnfree = true; + boot.loader.grub.enable = true; + boot.loader.grub.device = "/dev/vda"; + + networking.hostName = "testvm01"; + networking.domain = "home.2rjus.net"; + networking.useNetworkd = true; + networking.useDHCP = false; + services.resolved.enable = false; + networking.nameservers = [ + "10.69.13.5" + "10.69.13.6" + ]; + + systemd.network.enable = true; + systemd.network.networks."ens18" = { + matchConfig.Name = "ens18"; + address = [ + "10.69.13.101/24" + ]; + routes = [ + { Gateway = "10.69.13.1"; } + ]; + linkConfig.RequiredForOnline = "routable"; + }; + time.timeZone = "Europe/Oslo"; + + nix.settings.experimental-features = [ + "nix-command" + "flakes" + ]; + nix.settings.tarball-ttl = 0; + environment.systemPackages = with pkgs; [ + vim + wget + git + ]; + + # Open ports in the firewall. + # networking.firewall.allowedTCPPorts = [ ... ]; + # networking.firewall.allowedUDPPorts = [ ... ]; + # Or disable the firewall altogether. + networking.firewall.enable = false; + + system.stateVersion = "25.11"; # Did you read the comment? +} \ No newline at end of file diff --git a/hosts/testvm01/default.nix b/hosts/testvm01/default.nix new file mode 100644 index 0000000..57ed4b4 --- /dev/null +++ b/hosts/testvm01/default.nix @@ -0,0 +1,5 @@ +{ ... }: { + imports = [ + ./configuration.nix + ]; +} \ No newline at end of file diff --git a/terraform/vms.tf b/terraform/vms.tf index eecac99..604377f 100644 --- a/terraform/vms.tf +++ b/terraform/vms.tf @@ -31,6 +31,13 @@ locals { # Example Minimal VM using all defaults (uncomment to deploy): # "minimal-vm" = {} # "bootstrap-verify-test" = {} + "testvm01" = { + ip = "10.69.13.101/24" + cpu_cores = 2 + memory = 2048 + disk_size = "20G" + flake_branch = "pipeline-testing-improvements" + } } # Compute VM configurations with defaults applied From 21a32e0521e617437cf43ed8be2f695ea2cb6b66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torjus=20H=C3=A5kestad?= Date: Sun, 1 Feb 2026 16:51:27 +0100 Subject: [PATCH 3/7] terraform: refactor cloud-init to use proxmox_cloud_init_disk resource Replace SSH upload approach with native proxmox_cloud_init_disk resource for cleaner, more maintainable cloud-init management. Changes: - Use proxmox_cloud_init_disk for all VMs (not just branch-specific ones) - Include SSH keys, network config, and metadata in cloud-init disk - Conditionally include NIXOS_FLAKE_BRANCH for VMs with flake_branch set - Replace ide2 cloudinit disk with cdrom reference to cloud-init disk - Remove built-in cloud-init parameters (ciuser, sshkeys, etc.) - Remove cicustom parameter (no longer needed) - Remove proxmox_host variable (no SSH uploads required) - Remove .gitignore entry for .generated/ directory Benefits: - No SSH access to Proxmox required - All cloud-init config managed in Terraform - Consistent approach for all VMs - Cleaner state management Co-Authored-By: Claude Sonnet 4.5 --- .gitignore | 1 - terraform/cloud-init.tf | 94 ++++++++++++++++++++--------------------- terraform/variables.tf | 6 --- terraform/vms.tf | 25 +++-------- 4 files changed, 52 insertions(+), 74 deletions(-) diff --git a/.gitignore b/.gitignore index 11ce9bc..8068363 100644 --- a/.gitignore +++ b/.gitignore @@ -10,4 +10,3 @@ terraform/terraform.tfvars terraform/*.auto.tfvars terraform/crash.log terraform/crash.*.log -terraform/.generated/ diff --git a/terraform/cloud-init.tf b/terraform/cloud-init.tf index 2779145..837a2fb 100644 --- a/terraform/cloud-init.tf +++ b/terraform/cloud-init.tf @@ -1,55 +1,51 @@ -# Cloud-init configuration for branch-specific bootstrap +# Cloud-init configuration for all VMs # -# This file manages custom cloud-init snippets for VMs that need to bootstrap -# from a specific git branch (non-master). Production VMs omit flake_branch -# and use the default master branch. +# This file manages cloud-init disks for all VMs using the proxmox_cloud_init_disk resource. +# VMs with flake_branch set will include NIXOS_FLAKE_BRANCH environment variable. -# Generate cloud-init snippets for VMs with custom branch configuration -resource "local_file" "cloud_init_branch" { - for_each = { - for name, vm in local.vm_configs : name => vm - if vm.flake_branch != null - } +resource "proxmox_cloud_init_disk" "ci" { + for_each = local.vm_configs - filename = "${path.module}/.generated/cloud-init-${each.key}.yml" - content = yamlencode({ - # Write NIXOS_FLAKE_BRANCH to /etc/environment - # This will be read by bootstrap.nix service via EnvironmentFile - write_files = [{ - path = "/etc/environment" - content = "NIXOS_FLAKE_BRANCH=${each.value.flake_branch}\n" - append = true + name = each.key + pve_node = each.value.target_node + storage = each.value.storage + + # User data includes SSH keys and optionally NIXOS_FLAKE_BRANCH + user_data = <<-EOT + #cloud-config + ssh_authorized_keys: + - ${each.value.ssh_public_key} + ${each.value.flake_branch != null ? <<-BRANCH + write_files: + - path: /etc/environment + content: | + NIXOS_FLAKE_BRANCH=${each.value.flake_branch} + append: true + BRANCH +: ""} + EOT + +# Network configuration - static IP or DHCP +network_config = yamlencode({ + version = 1 + config = [{ + type = "physical" + name = "ens18" + subnets = each.value.ip != null ? [{ + type = "static" + address = each.value.ip + gateway = each.value.gateway + dns_nameservers = split(" ", each.value.nameservers) + dns_search = [each.value.search_domain] + }] : [{ + type = "dhcp" }] - }) + }] +}) - file_permission = "0644" +# Instance metadata +meta_data = yamlencode({ + instance_id = sha1(each.key) + local-hostname = each.key +}) } - -# Upload cloud-init snippets to Proxmox -# Note: This requires SSH access to the Proxmox host -# Alternative: Manually copy files or use Proxmox API if available -resource "null_resource" "upload_cloud_init" { - for_each = { - for name, vm in local.vm_configs : name => vm - if vm.flake_branch != null - } - - # Trigger re-upload when content changes - triggers = { - content_hash = local_file.cloud_init_branch[each.key].content - } - - # Upload the cloud-init file to Proxmox snippets directory - provisioner "local-exec" { - command = <<-EOT - scp -o StrictHostKeyChecking=no \ - ${local_file.cloud_init_branch[each.key].filename} \ - ${var.proxmox_host}:/var/lib/vz/snippets/cloud-init-${each.key}.yml - EOT - } - - depends_on = [local_file.cloud_init_branch] -} - -# Ensure VMs depend on cloud-init being uploaded -# This is handled implicitly by the cicustom reference in vms.tf diff --git a/terraform/variables.tf b/terraform/variables.tf index c6acc26..fe13cb0 100644 --- a/terraform/variables.tf +++ b/terraform/variables.tf @@ -21,12 +21,6 @@ variable "proxmox_tls_insecure" { default = true } -variable "proxmox_host" { - description = "Proxmox host for SSH access (used to upload cloud-init snippets)" - type = string - default = "pve1.home.2rjus.net" -} - # Default values for VM configurations # These can be overridden per-VM in vms.tf diff --git a/terraform/vms.tf b/terraform/vms.tf index 604377f..340b2cb 100644 --- a/terraform/vms.tf +++ b/terraform/vms.tf @@ -32,10 +32,10 @@ locals { # "minimal-vm" = {} # "bootstrap-verify-test" = {} "testvm01" = { - ip = "10.69.13.101/24" - cpu_cores = 2 - memory = 2048 - disk_size = "20G" + ip = "10.69.13.101/24" + cpu_cores = 2 + memory = 2048 + disk_size = "20G" flake_branch = "pipeline-testing-improvements" } } @@ -104,8 +104,9 @@ resource "proxmox_vm_qemu" "vm" { } ide { ide2 { - cloudinit { - storage = each.value.storage + # Reference the custom cloud-init disk created in cloud-init.tf + cdrom { + iso = proxmox_cloud_init_disk.ci[each.key].id } } } @@ -117,18 +118,6 @@ resource "proxmox_vm_qemu" "vm" { # Agent agent = 1 - # Cloud-init configuration - ciuser = "root" - sshkeys = each.value.ssh_public_key - nameserver = each.value.nameservers - searchdomain = each.value.search_domain - - # Network configuration - DHCP or static IP - ipconfig0 = each.value.ip != null ? "ip=${each.value.ip},gw=${each.value.gateway}" : "ip=dhcp" - - # Custom cloud-init disk for branch configuration (if flake_branch is set) - cicustom = each.value.flake_branch != null ? "user=${each.value.storage}:snippets/cloud-init-${each.key}.yml" : null - # Skip IPv6 since we don't use it skip_ipv6 = true From 1f1829dc2f7e5b829da1cb3bcff7125e76f98e11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torjus=20H=C3=A5kestad?= Date: Sun, 1 Feb 2026 16:51:41 +0100 Subject: [PATCH 4/7] docs: update terraform README for cloud-init refactoring Remove mention of .generated/ directory and clarify that cloud-init.tf manages all cloud-init disks, not just branch-specific ones. Co-Authored-By: Claude Sonnet 4.5 --- terraform/README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/terraform/README.md b/terraform/README.md index 38fffaa..d695c54 100644 --- a/terraform/README.md +++ b/terraform/README.md @@ -198,11 +198,10 @@ deployment_summary = { - `main.tf` - Provider configuration - `variables.tf` - Variable definitions and defaults - `vms.tf` - VM definitions and deployment logic -- `cloud-init.tf` - Custom cloud-init configuration for branch-specific bootstrap +- `cloud-init.tf` - Cloud-init disk management (SSH keys, networking, branch config) - `outputs.tf` - Output definitions for deployed VMs - `terraform.tfvars.example` - Example credentials file - `terraform.tfvars` - Your actual credentials (gitignored) -- `.generated/` - Auto-generated cloud-init files (gitignored) - `vm.tf.old` - Archived single-VM configuration (reference) ## Notes From fca50562c3290b170ad5e0efbc1396623020dfcd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torjus=20H=C3=A5kestad?= Date: Sun, 1 Feb 2026 16:54:33 +0100 Subject: [PATCH 5/7] terraform: fix cloud-init conditional type inconsistency Fix OpenTofu error where static IP and DHCP branches had different object structures in the subnets array. Move conditional to network_config level so both branches return complete, consistent yamlencode() results. Error was: "The true and false result expressions must have consistent types" Solution: Make network_config itself conditional rather than the subnets array, ensuring both branches return the same type (string from yamlencode). Co-Authored-By: Claude Sonnet 4.5 --- terraform/cloud-init.tf | 49 +++++++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/terraform/cloud-init.tf b/terraform/cloud-init.tf index 837a2fb..de61961 100644 --- a/terraform/cloud-init.tf +++ b/terraform/cloud-init.tf @@ -25,27 +25,34 @@ resource "proxmox_cloud_init_disk" "ci" { : ""} EOT -# Network configuration - static IP or DHCP -network_config = yamlencode({ - version = 1 - config = [{ - type = "physical" - name = "ens18" - subnets = each.value.ip != null ? [{ - type = "static" - address = each.value.ip - gateway = each.value.gateway - dns_nameservers = split(" ", each.value.nameservers) - dns_search = [each.value.search_domain] - }] : [{ - type = "dhcp" + # Network configuration - static IP or DHCP + network_config = each.value.ip != null ? yamlencode({ + version = 1 + config = [{ + type = "physical" + name = "ens18" + subnets = [{ + type = "static" + address = each.value.ip + gateway = each.value.gateway + dns_nameservers = split(" ", each.value.nameservers) + dns_search = [each.value.search_domain] + }] }] - }] -}) + }) : yamlencode({ + version = 1 + config = [{ + type = "physical" + name = "ens18" + subnets = [{ + type = "dhcp" + }] + }] + }) -# Instance metadata -meta_data = yamlencode({ - instance_id = sha1(each.key) - local-hostname = each.key -}) + # Instance metadata + meta_data = yamlencode({ + instance_id = sha1(each.key) + local-hostname = each.key + }) } From cec496dda7daf695cc54fd90d819fa62738793fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torjus=20H=C3=A5kestad?= Date: Sun, 1 Feb 2026 16:59:09 +0100 Subject: [PATCH 6/7] terraform: use local storage for cloud-init disks Fix error "500 can't upload to storage type 'zfspool'" by using "local" storage pool for cloud-init disks instead of the VM's storage pool. Cloud-init disks require storage that supports ISO/snippet content types, which zfspool does not. The "local" storage pool (directory-based) supports this content type. Co-Authored-By: Claude Sonnet 4.5 --- terraform/cloud-init.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform/cloud-init.tf b/terraform/cloud-init.tf index de61961..5ca7fe7 100644 --- a/terraform/cloud-init.tf +++ b/terraform/cloud-init.tf @@ -8,7 +8,7 @@ resource "proxmox_cloud_init_disk" "ci" { name = each.key pve_node = each.value.target_node - storage = each.value.storage + storage = "local" # Cloud-init disks must be on storage that supports ISO/snippets # User data includes SSH keys and optionally NIXOS_FLAKE_BRANCH user_data = <<-EOT From 99082860628e33f06787105f883cc307f4d59b61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torjus=20H=C3=A5kestad?= Date: Sun, 1 Feb 2026 17:08:05 +0100 Subject: [PATCH 7/7] scripts: fix create-host flake.nix insertion point Fix bug where new hosts were added outside of nixosConfigurations block instead of inside it. Issues fixed: 1. Pattern was looking for "packages =" but actual text is "packages = forAllSystems" 2. Replacement was putting new entry AFTER closing brace instead of BEFORE 3. testvm01 was at top-level flake output instead of in nixosConfigurations Changes: - Update pattern to match "packages = forAllSystems" - Put new entry BEFORE the closing brace of nixosConfigurations - Move testvm01 to correct location inside nixosConfigurations block Result: nix flake show now correctly shows testvm01 as NixOS configuration Co-Authored-By: Claude Sonnet 4.5 --- flake.nix | 2 +- scripts/create-host/manipulators.py | 10 +++++----- scripts/create-host/templates/configuration.nix.j2 | 5 ++--- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/flake.nix b/flake.nix index e3b6054..05d7a18 100644 --- a/flake.nix +++ b/flake.nix @@ -334,7 +334,6 @@ sops-nix.nixosModules.sops ]; }; - }; testvm01 = nixpkgs.lib.nixosSystem { inherit system; specialArgs = { @@ -351,6 +350,7 @@ sops-nix.nixosModules.sops ]; }; + }; packages = forAllSystems ( { pkgs }: { diff --git a/scripts/create-host/manipulators.py b/scripts/create-host/manipulators.py index 4d821cc..366e215 100644 --- a/scripts/create-host/manipulators.py +++ b/scripts/create-host/manipulators.py @@ -50,17 +50,17 @@ def update_flake_nix(config: HostConfig, repo_root: Path, force: bool = False) - if count == 0: raise ValueError(f"Could not find existing entry for {config.hostname} in flake.nix") else: - # Insert new entry before closing brace - # Pattern: " };\n packages =" - pattern = r"( \};)\n( packages =)" - replacement = rf"\g<1>\n{new_entry}\g<2>" + # Insert new entry before closing brace of nixosConfigurations + # Pattern: " };\n packages = forAllSystems" + pattern = r"( \};)\n( packages = forAllSystems)" + replacement = rf"{new_entry}\g<1>\n\g<2>" new_content, count = re.subn(pattern, replacement, content) if count == 0: raise ValueError( "Could not find insertion point in flake.nix. " - "Looking for pattern: ' };\\n packages ='" + "Looking for pattern: ' };\\n packages = forAllSystems'" ) flake_path.write_text(new_content) diff --git a/scripts/create-host/templates/configuration.nix.j2 b/scripts/create-host/templates/configuration.nix.j2 index 1665e67..30e830f 100644 --- a/scripts/create-host/templates/configuration.nix.j2 +++ b/scripts/create-host/templates/configuration.nix.j2 @@ -7,16 +7,15 @@ { imports = [ - ../template/hardware-configuration.nix + ../template2/hardware-configuration.nix ../../system ../../common/vm ]; nixpkgs.config.allowUnfree = true; - # Use the systemd-boot EFI boot loader. boot.loader.grub.enable = true; - boot.loader.grub.device = "/dev/sda"; + boot.loader.grub.device = "/dev/vda"; networking.hostName = "{{ hostname }}"; networking.domain = "{{ domain }}";