template2: send bootstrap status to Loki for remote monitoring
Adds log_to_loki function that pushes structured log entries to Loki at key bootstrap stages (starting, network_ok, vault_*, building, success, failed). Enables querying bootstrap state via LogQL without console access. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -6,6 +6,43 @@ let
|
|||||||
text = ''
|
text = ''
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
|
LOKI_URL="http://monitoring01.home.2rjus.net:3100/loki/api/v1/push"
|
||||||
|
|
||||||
|
# Send a log entry to Loki with bootstrap status
|
||||||
|
# Usage: log_to_loki <stage> <message>
|
||||||
|
# Fails silently if Loki is unreachable
|
||||||
|
log_to_loki() {
|
||||||
|
local stage="$1"
|
||||||
|
local message="$2"
|
||||||
|
local timestamp_ns
|
||||||
|
timestamp_ns="$(date +%s)000000000"
|
||||||
|
|
||||||
|
local payload
|
||||||
|
payload=$(jq -n \
|
||||||
|
--arg host "$HOSTNAME" \
|
||||||
|
--arg stage "$stage" \
|
||||||
|
--arg branch "''${BRANCH:-master}" \
|
||||||
|
--arg ts "$timestamp_ns" \
|
||||||
|
--arg msg "$message" \
|
||||||
|
'{
|
||||||
|
streams: [{
|
||||||
|
stream: {
|
||||||
|
job: "bootstrap",
|
||||||
|
host: $host,
|
||||||
|
stage: $stage,
|
||||||
|
branch: $branch
|
||||||
|
},
|
||||||
|
values: [[$ts, $msg]]
|
||||||
|
}]
|
||||||
|
}')
|
||||||
|
|
||||||
|
curl -s --connect-timeout 2 --max-time 5 \
|
||||||
|
-X POST \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d "$payload" \
|
||||||
|
"$LOKI_URL" >/dev/null 2>&1 || true
|
||||||
|
}
|
||||||
|
|
||||||
echo "================================================================================"
|
echo "================================================================================"
|
||||||
echo " NIXOS BOOTSTRAP IN PROGRESS"
|
echo " NIXOS BOOTSTRAP IN PROGRESS"
|
||||||
echo "================================================================================"
|
echo "================================================================================"
|
||||||
@@ -14,19 +51,27 @@ let
|
|||||||
# Read hostname set by cloud-init (from Terraform VM name via user-data)
|
# Read hostname set by cloud-init (from Terraform VM name via user-data)
|
||||||
# Cloud-init sets the system hostname from user-data.txt, so we read it from hostnamectl
|
# Cloud-init sets the system hostname from user-data.txt, so we read it from hostnamectl
|
||||||
HOSTNAME=$(hostnamectl hostname)
|
HOSTNAME=$(hostnamectl hostname)
|
||||||
|
# Read git branch from environment, default to master
|
||||||
|
BRANCH="''${NIXOS_FLAKE_BRANCH:-master}"
|
||||||
|
|
||||||
echo "Hostname: $HOSTNAME"
|
echo "Hostname: $HOSTNAME"
|
||||||
echo ""
|
echo ""
|
||||||
echo "Starting NixOS bootstrap for host: $HOSTNAME"
|
echo "Starting NixOS bootstrap for host: $HOSTNAME"
|
||||||
|
|
||||||
|
log_to_loki "starting" "Bootstrap starting for $HOSTNAME (branch: $BRANCH)"
|
||||||
|
|
||||||
echo "Waiting for network connectivity..."
|
echo "Waiting for network connectivity..."
|
||||||
|
|
||||||
# Verify we can reach the git server via HTTPS (doesn't respond to ping)
|
# Verify we can reach the git server via HTTPS (doesn't respond to ping)
|
||||||
if ! curl -s --connect-timeout 5 --max-time 10 https://git.t-juice.club >/dev/null 2>&1; then
|
if ! curl -s --connect-timeout 5 --max-time 10 https://git.t-juice.club >/dev/null 2>&1; then
|
||||||
echo "ERROR: Cannot reach git.t-juice.club via HTTPS"
|
echo "ERROR: Cannot reach git.t-juice.club via HTTPS"
|
||||||
echo "Check network configuration and DNS settings"
|
echo "Check network configuration and DNS settings"
|
||||||
|
log_to_loki "failed" "Network check failed - cannot reach git.t-juice.club"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo "Network connectivity confirmed"
|
echo "Network connectivity confirmed"
|
||||||
|
log_to_loki "network_ok" "Network connectivity confirmed"
|
||||||
|
|
||||||
# Unwrap Vault token and store AppRole credentials (if provided)
|
# Unwrap Vault token and store AppRole credentials (if provided)
|
||||||
if [ -n "''${VAULT_WRAPPED_TOKEN:-}" ]; then
|
if [ -n "''${VAULT_WRAPPED_TOKEN:-}" ]; then
|
||||||
@@ -55,6 +100,7 @@ let
|
|||||||
chmod 600 /var/lib/vault/approle/secret-id
|
chmod 600 /var/lib/vault/approle/secret-id
|
||||||
|
|
||||||
echo "Vault credentials unwrapped and stored successfully"
|
echo "Vault credentials unwrapped and stored successfully"
|
||||||
|
log_to_loki "vault_ok" "Vault credentials unwrapped and stored"
|
||||||
else
|
else
|
||||||
echo "WARNING: Failed to unwrap Vault token"
|
echo "WARNING: Failed to unwrap Vault token"
|
||||||
if [ -n "$UNWRAP_RESPONSE" ]; then
|
if [ -n "$UNWRAP_RESPONSE" ]; then
|
||||||
@@ -68,17 +114,17 @@ let
|
|||||||
echo "To regenerate token, run: create-host --hostname $HOSTNAME --force"
|
echo "To regenerate token, run: create-host --hostname $HOSTNAME --force"
|
||||||
echo ""
|
echo ""
|
||||||
echo "Vault secrets will not be available, but continuing bootstrap..."
|
echo "Vault secrets will not be available, but continuing bootstrap..."
|
||||||
|
log_to_loki "vault_warn" "Failed to unwrap Vault token - continuing without secrets"
|
||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
echo "No Vault wrapped token provided (VAULT_WRAPPED_TOKEN not set)"
|
echo "No Vault wrapped token provided (VAULT_WRAPPED_TOKEN not set)"
|
||||||
echo "Skipping Vault credential setup"
|
echo "Skipping Vault credential setup"
|
||||||
|
log_to_loki "vault_skip" "No Vault token provided - skipping credential setup"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo "Fetching and building NixOS configuration from flake..."
|
echo "Fetching and building NixOS configuration from flake..."
|
||||||
|
|
||||||
# Read git branch from environment, default to master
|
|
||||||
BRANCH="''${NIXOS_FLAKE_BRANCH:-master}"
|
|
||||||
echo "Using git branch: $BRANCH"
|
echo "Using git branch: $BRANCH"
|
||||||
|
log_to_loki "building" "Starting nixos-rebuild boot"
|
||||||
|
|
||||||
# Build and activate the host-specific configuration
|
# Build and activate the host-specific configuration
|
||||||
FLAKE_URL="git+https://git.t-juice.club/torjus/nixos-servers.git?ref=$BRANCH#''${HOSTNAME}"
|
FLAKE_URL="git+https://git.t-juice.club/torjus/nixos-servers.git?ref=$BRANCH#''${HOSTNAME}"
|
||||||
@@ -86,12 +132,14 @@ let
|
|||||||
if nixos-rebuild boot --flake "$FLAKE_URL"; then
|
if nixos-rebuild boot --flake "$FLAKE_URL"; then
|
||||||
echo "Successfully built configuration for $HOSTNAME"
|
echo "Successfully built configuration for $HOSTNAME"
|
||||||
echo "Rebooting into new configuration..."
|
echo "Rebooting into new configuration..."
|
||||||
|
log_to_loki "success" "Build successful - rebooting into new configuration"
|
||||||
sleep 2
|
sleep 2
|
||||||
systemctl reboot
|
systemctl reboot
|
||||||
else
|
else
|
||||||
echo "ERROR: nixos-rebuild failed for $HOSTNAME"
|
echo "ERROR: nixos-rebuild failed for $HOSTNAME"
|
||||||
echo "Check that flake has configuration for this hostname"
|
echo "Check that flake has configuration for this hostname"
|
||||||
echo "Manual intervention required - system will not reboot"
|
echo "Manual intervention required - system will not reboot"
|
||||||
|
log_to_loki "failed" "nixos-rebuild failed - manual intervention required"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
'';
|
'';
|
||||||
|
|||||||
Reference in New Issue
Block a user