template2: send bootstrap status to Loki for remote monitoring
Adds log_to_loki function that pushes structured log entries to Loki at key bootstrap stages (starting, network_ok, vault_*, building, success, failed). Enables querying bootstrap state via LogQL without console access. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -6,6 +6,43 @@ let
|
||||
text = ''
|
||||
set -euo pipefail
|
||||
|
||||
LOKI_URL="http://monitoring01.home.2rjus.net:3100/loki/api/v1/push"
|
||||
|
||||
# Send a log entry to Loki with bootstrap status
|
||||
# Usage: log_to_loki <stage> <message>
|
||||
# Fails silently if Loki is unreachable
|
||||
log_to_loki() {
|
||||
local stage="$1"
|
||||
local message="$2"
|
||||
local timestamp_ns
|
||||
timestamp_ns="$(date +%s)000000000"
|
||||
|
||||
local payload
|
||||
payload=$(jq -n \
|
||||
--arg host "$HOSTNAME" \
|
||||
--arg stage "$stage" \
|
||||
--arg branch "''${BRANCH:-master}" \
|
||||
--arg ts "$timestamp_ns" \
|
||||
--arg msg "$message" \
|
||||
'{
|
||||
streams: [{
|
||||
stream: {
|
||||
job: "bootstrap",
|
||||
host: $host,
|
||||
stage: $stage,
|
||||
branch: $branch
|
||||
},
|
||||
values: [[$ts, $msg]]
|
||||
}]
|
||||
}')
|
||||
|
||||
curl -s --connect-timeout 2 --max-time 5 \
|
||||
-X POST \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$payload" \
|
||||
"$LOKI_URL" >/dev/null 2>&1 || true
|
||||
}
|
||||
|
||||
echo "================================================================================"
|
||||
echo " NIXOS BOOTSTRAP IN PROGRESS"
|
||||
echo "================================================================================"
|
||||
@@ -14,19 +51,27 @@ let
|
||||
# Read hostname set by cloud-init (from Terraform VM name via user-data)
|
||||
# Cloud-init sets the system hostname from user-data.txt, so we read it from hostnamectl
|
||||
HOSTNAME=$(hostnamectl hostname)
|
||||
# Read git branch from environment, default to master
|
||||
BRANCH="''${NIXOS_FLAKE_BRANCH:-master}"
|
||||
|
||||
echo "Hostname: $HOSTNAME"
|
||||
echo ""
|
||||
echo "Starting NixOS bootstrap for host: $HOSTNAME"
|
||||
|
||||
log_to_loki "starting" "Bootstrap starting for $HOSTNAME (branch: $BRANCH)"
|
||||
|
||||
echo "Waiting for network connectivity..."
|
||||
|
||||
# Verify we can reach the git server via HTTPS (doesn't respond to ping)
|
||||
if ! curl -s --connect-timeout 5 --max-time 10 https://git.t-juice.club >/dev/null 2>&1; then
|
||||
echo "ERROR: Cannot reach git.t-juice.club via HTTPS"
|
||||
echo "Check network configuration and DNS settings"
|
||||
log_to_loki "failed" "Network check failed - cannot reach git.t-juice.club"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Network connectivity confirmed"
|
||||
log_to_loki "network_ok" "Network connectivity confirmed"
|
||||
|
||||
# Unwrap Vault token and store AppRole credentials (if provided)
|
||||
if [ -n "''${VAULT_WRAPPED_TOKEN:-}" ]; then
|
||||
@@ -55,6 +100,7 @@ let
|
||||
chmod 600 /var/lib/vault/approle/secret-id
|
||||
|
||||
echo "Vault credentials unwrapped and stored successfully"
|
||||
log_to_loki "vault_ok" "Vault credentials unwrapped and stored"
|
||||
else
|
||||
echo "WARNING: Failed to unwrap Vault token"
|
||||
if [ -n "$UNWRAP_RESPONSE" ]; then
|
||||
@@ -68,17 +114,17 @@ let
|
||||
echo "To regenerate token, run: create-host --hostname $HOSTNAME --force"
|
||||
echo ""
|
||||
echo "Vault secrets will not be available, but continuing bootstrap..."
|
||||
log_to_loki "vault_warn" "Failed to unwrap Vault token - continuing without secrets"
|
||||
fi
|
||||
else
|
||||
echo "No Vault wrapped token provided (VAULT_WRAPPED_TOKEN not set)"
|
||||
echo "Skipping Vault credential setup"
|
||||
log_to_loki "vault_skip" "No Vault token provided - skipping credential setup"
|
||||
fi
|
||||
|
||||
echo "Fetching and building NixOS configuration from flake..."
|
||||
|
||||
# Read git branch from environment, default to master
|
||||
BRANCH="''${NIXOS_FLAKE_BRANCH:-master}"
|
||||
echo "Using git branch: $BRANCH"
|
||||
log_to_loki "building" "Starting nixos-rebuild boot"
|
||||
|
||||
# Build and activate the host-specific configuration
|
||||
FLAKE_URL="git+https://git.t-juice.club/torjus/nixos-servers.git?ref=$BRANCH#''${HOSTNAME}"
|
||||
@@ -86,12 +132,14 @@ let
|
||||
if nixos-rebuild boot --flake "$FLAKE_URL"; then
|
||||
echo "Successfully built configuration for $HOSTNAME"
|
||||
echo "Rebooting into new configuration..."
|
||||
log_to_loki "success" "Build successful - rebooting into new configuration"
|
||||
sleep 2
|
||||
systemctl reboot
|
||||
else
|
||||
echo "ERROR: nixos-rebuild failed for $HOSTNAME"
|
||||
echo "Check that flake has configuration for this hostname"
|
||||
echo "Manual intervention required - system will not reboot"
|
||||
log_to_loki "failed" "nixos-rebuild failed - manual intervention required"
|
||||
exit 1
|
||||
fi
|
||||
'';
|
||||
|
||||
Reference in New Issue
Block a user