template2: send bootstrap status to Loki for remote monitoring

Adds log_to_loki function that pushes structured log entries to Loki
at key bootstrap stages (starting, network_ok, vault_*, building,
success, failed). Enables querying bootstrap state via LogQL without
console access.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-02-07 15:34:47 +01:00
parent 11261c4636
commit ae3039af19

View File

@@ -6,6 +6,43 @@ let
text = '' text = ''
set -euo pipefail set -euo pipefail
LOKI_URL="http://monitoring01.home.2rjus.net:3100/loki/api/v1/push"
# Send a log entry to Loki with bootstrap status
# Usage: log_to_loki <stage> <message>
# Fails silently if Loki is unreachable
log_to_loki() {
local stage="$1"
local message="$2"
local timestamp_ns
timestamp_ns="$(date +%s)000000000"
local payload
payload=$(jq -n \
--arg host "$HOSTNAME" \
--arg stage "$stage" \
--arg branch "''${BRANCH:-master}" \
--arg ts "$timestamp_ns" \
--arg msg "$message" \
'{
streams: [{
stream: {
job: "bootstrap",
host: $host,
stage: $stage,
branch: $branch
},
values: [[$ts, $msg]]
}]
}')
curl -s --connect-timeout 2 --max-time 5 \
-X POST \
-H "Content-Type: application/json" \
-d "$payload" \
"$LOKI_URL" >/dev/null 2>&1 || true
}
echo "================================================================================" echo "================================================================================"
echo " NIXOS BOOTSTRAP IN PROGRESS" echo " NIXOS BOOTSTRAP IN PROGRESS"
echo "================================================================================" echo "================================================================================"
@@ -14,19 +51,27 @@ let
# Read hostname set by cloud-init (from Terraform VM name via user-data) # Read hostname set by cloud-init (from Terraform VM name via user-data)
# Cloud-init sets the system hostname from user-data.txt, so we read it from hostnamectl # Cloud-init sets the system hostname from user-data.txt, so we read it from hostnamectl
HOSTNAME=$(hostnamectl hostname) HOSTNAME=$(hostnamectl hostname)
# Read git branch from environment, default to master
BRANCH="''${NIXOS_FLAKE_BRANCH:-master}"
echo "Hostname: $HOSTNAME" echo "Hostname: $HOSTNAME"
echo "" echo ""
echo "Starting NixOS bootstrap for host: $HOSTNAME" echo "Starting NixOS bootstrap for host: $HOSTNAME"
log_to_loki "starting" "Bootstrap starting for $HOSTNAME (branch: $BRANCH)"
echo "Waiting for network connectivity..." echo "Waiting for network connectivity..."
# Verify we can reach the git server via HTTPS (doesn't respond to ping) # Verify we can reach the git server via HTTPS (doesn't respond to ping)
if ! curl -s --connect-timeout 5 --max-time 10 https://git.t-juice.club >/dev/null 2>&1; then if ! curl -s --connect-timeout 5 --max-time 10 https://git.t-juice.club >/dev/null 2>&1; then
echo "ERROR: Cannot reach git.t-juice.club via HTTPS" echo "ERROR: Cannot reach git.t-juice.club via HTTPS"
echo "Check network configuration and DNS settings" echo "Check network configuration and DNS settings"
log_to_loki "failed" "Network check failed - cannot reach git.t-juice.club"
exit 1 exit 1
fi fi
echo "Network connectivity confirmed" echo "Network connectivity confirmed"
log_to_loki "network_ok" "Network connectivity confirmed"
# Unwrap Vault token and store AppRole credentials (if provided) # Unwrap Vault token and store AppRole credentials (if provided)
if [ -n "''${VAULT_WRAPPED_TOKEN:-}" ]; then if [ -n "''${VAULT_WRAPPED_TOKEN:-}" ]; then
@@ -55,6 +100,7 @@ let
chmod 600 /var/lib/vault/approle/secret-id chmod 600 /var/lib/vault/approle/secret-id
echo "Vault credentials unwrapped and stored successfully" echo "Vault credentials unwrapped and stored successfully"
log_to_loki "vault_ok" "Vault credentials unwrapped and stored"
else else
echo "WARNING: Failed to unwrap Vault token" echo "WARNING: Failed to unwrap Vault token"
if [ -n "$UNWRAP_RESPONSE" ]; then if [ -n "$UNWRAP_RESPONSE" ]; then
@@ -68,17 +114,17 @@ let
echo "To regenerate token, run: create-host --hostname $HOSTNAME --force" echo "To regenerate token, run: create-host --hostname $HOSTNAME --force"
echo "" echo ""
echo "Vault secrets will not be available, but continuing bootstrap..." echo "Vault secrets will not be available, but continuing bootstrap..."
log_to_loki "vault_warn" "Failed to unwrap Vault token - continuing without secrets"
fi fi
else else
echo "No Vault wrapped token provided (VAULT_WRAPPED_TOKEN not set)" echo "No Vault wrapped token provided (VAULT_WRAPPED_TOKEN not set)"
echo "Skipping Vault credential setup" echo "Skipping Vault credential setup"
log_to_loki "vault_skip" "No Vault token provided - skipping credential setup"
fi fi
echo "Fetching and building NixOS configuration from flake..." echo "Fetching and building NixOS configuration from flake..."
# Read git branch from environment, default to master
BRANCH="''${NIXOS_FLAKE_BRANCH:-master}"
echo "Using git branch: $BRANCH" echo "Using git branch: $BRANCH"
log_to_loki "building" "Starting nixos-rebuild boot"
# Build and activate the host-specific configuration # Build and activate the host-specific configuration
FLAKE_URL="git+https://git.t-juice.club/torjus/nixos-servers.git?ref=$BRANCH#''${HOSTNAME}" FLAKE_URL="git+https://git.t-juice.club/torjus/nixos-servers.git?ref=$BRANCH#''${HOSTNAME}"
@@ -86,12 +132,14 @@ let
if nixos-rebuild boot --flake "$FLAKE_URL"; then if nixos-rebuild boot --flake "$FLAKE_URL"; then
echo "Successfully built configuration for $HOSTNAME" echo "Successfully built configuration for $HOSTNAME"
echo "Rebooting into new configuration..." echo "Rebooting into new configuration..."
log_to_loki "success" "Build successful - rebooting into new configuration"
sleep 2 sleep 2
systemctl reboot systemctl reboot
else else
echo "ERROR: nixos-rebuild failed for $HOSTNAME" echo "ERROR: nixos-rebuild failed for $HOSTNAME"
echo "Check that flake has configuration for this hostname" echo "Check that flake has configuration for this hostname"
echo "Manual intervention required - system will not reboot" echo "Manual intervention required - system will not reboot"
log_to_loki "failed" "nixos-rebuild failed - manual intervention required"
exit 1 exit 1
fi fi
''; '';