From ae3039af195c9b65ea893a4880c2a47e4d46183e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torjus=20H=C3=A5kestad?= Date: Sat, 7 Feb 2026 15:34:47 +0100 Subject: [PATCH] template2: send bootstrap status to Loki for remote monitoring Adds log_to_loki function that pushes structured log entries to Loki at key bootstrap stages (starting, network_ok, vault_*, building, success, failed). Enables querying bootstrap state via LogQL without console access. Co-Authored-By: Claude Opus 4.5 --- hosts/template2/bootstrap.nix | 54 +++++++++++++++++++++++++++++++++-- 1 file changed, 51 insertions(+), 3 deletions(-) diff --git a/hosts/template2/bootstrap.nix b/hosts/template2/bootstrap.nix index 7db16ca..ea2d107 100644 --- a/hosts/template2/bootstrap.nix +++ b/hosts/template2/bootstrap.nix @@ -6,6 +6,43 @@ let text = '' set -euo pipefail + LOKI_URL="http://monitoring01.home.2rjus.net:3100/loki/api/v1/push" + + # Send a log entry to Loki with bootstrap status + # Usage: log_to_loki + # Fails silently if Loki is unreachable + log_to_loki() { + local stage="$1" + local message="$2" + local timestamp_ns + timestamp_ns="$(date +%s)000000000" + + local payload + payload=$(jq -n \ + --arg host "$HOSTNAME" \ + --arg stage "$stage" \ + --arg branch "''${BRANCH:-master}" \ + --arg ts "$timestamp_ns" \ + --arg msg "$message" \ + '{ + streams: [{ + stream: { + job: "bootstrap", + host: $host, + stage: $stage, + branch: $branch + }, + values: [[$ts, $msg]] + }] + }') + + curl -s --connect-timeout 2 --max-time 5 \ + -X POST \ + -H "Content-Type: application/json" \ + -d "$payload" \ + "$LOKI_URL" >/dev/null 2>&1 || true + } + echo "================================================================================" echo " NIXOS BOOTSTRAP IN PROGRESS" echo "================================================================================" @@ -14,19 +51,27 @@ let # Read hostname set by cloud-init (from Terraform VM name via user-data) # Cloud-init sets the system hostname from user-data.txt, so we read it from hostnamectl HOSTNAME=$(hostnamectl hostname) + # Read git branch from environment, default to master + BRANCH="''${NIXOS_FLAKE_BRANCH:-master}" + echo "Hostname: $HOSTNAME" echo "" echo "Starting NixOS bootstrap for host: $HOSTNAME" + + log_to_loki "starting" "Bootstrap starting for $HOSTNAME (branch: $BRANCH)" + echo "Waiting for network connectivity..." # Verify we can reach the git server via HTTPS (doesn't respond to ping) if ! curl -s --connect-timeout 5 --max-time 10 https://git.t-juice.club >/dev/null 2>&1; then echo "ERROR: Cannot reach git.t-juice.club via HTTPS" echo "Check network configuration and DNS settings" + log_to_loki "failed" "Network check failed - cannot reach git.t-juice.club" exit 1 fi echo "Network connectivity confirmed" + log_to_loki "network_ok" "Network connectivity confirmed" # Unwrap Vault token and store AppRole credentials (if provided) if [ -n "''${VAULT_WRAPPED_TOKEN:-}" ]; then @@ -55,6 +100,7 @@ let chmod 600 /var/lib/vault/approle/secret-id echo "Vault credentials unwrapped and stored successfully" + log_to_loki "vault_ok" "Vault credentials unwrapped and stored" else echo "WARNING: Failed to unwrap Vault token" if [ -n "$UNWRAP_RESPONSE" ]; then @@ -68,17 +114,17 @@ let echo "To regenerate token, run: create-host --hostname $HOSTNAME --force" echo "" echo "Vault secrets will not be available, but continuing bootstrap..." + log_to_loki "vault_warn" "Failed to unwrap Vault token - continuing without secrets" fi else echo "No Vault wrapped token provided (VAULT_WRAPPED_TOKEN not set)" echo "Skipping Vault credential setup" + log_to_loki "vault_skip" "No Vault token provided - skipping credential setup" fi echo "Fetching and building NixOS configuration from flake..." - - # Read git branch from environment, default to master - BRANCH="''${NIXOS_FLAKE_BRANCH:-master}" echo "Using git branch: $BRANCH" + log_to_loki "building" "Starting nixos-rebuild boot" # Build and activate the host-specific configuration FLAKE_URL="git+https://git.t-juice.club/torjus/nixos-servers.git?ref=$BRANCH#''${HOSTNAME}" @@ -86,12 +132,14 @@ let if nixos-rebuild boot --flake "$FLAKE_URL"; then echo "Successfully built configuration for $HOSTNAME" echo "Rebooting into new configuration..." + log_to_loki "success" "Build successful - rebooting into new configuration" sleep 2 systemctl reboot else echo "ERROR: nixos-rebuild failed for $HOSTNAME" echo "Check that flake has configuration for this hostname" echo "Manual intervention required - system will not reboot" + log_to_loki "failed" "nixos-rebuild failed - manual intervention required" exit 1 fi '';