Add an optional Prometheus metrics HTTP endpoint to the listener for monitoring deployment operations. Includes four metrics: - homelab_deploy_deployments_total (counter with status/action/error_code) - homelab_deploy_deployment_duration_seconds (histogram with action/success) - homelab_deploy_deployment_in_progress (gauge) - homelab_deploy_info (gauge with hostname/tier/role/version) New CLI flags: --metrics-enabled, --metrics-addr (default :9972) New NixOS options: metrics.enable, metrics.address, metrics.openFirewall Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
167 lines
5.1 KiB
Nix
167 lines
5.1 KiB
Nix
{ self }:
|
|
{ config, lib, pkgs, ... }:
|
|
|
|
let
|
|
cfg = config.services.homelab-deploy.listener;
|
|
|
|
# Build command line arguments from configuration
|
|
args = lib.concatStringsSep " " ([
|
|
"--hostname ${lib.escapeShellArg cfg.hostname}"
|
|
"--tier ${cfg.tier}"
|
|
"--nats-url ${lib.escapeShellArg cfg.natsUrl}"
|
|
"--nkey-file ${lib.escapeShellArg cfg.nkeyFile}"
|
|
"--flake-url ${lib.escapeShellArg cfg.flakeUrl}"
|
|
"--timeout ${toString cfg.timeout}"
|
|
"--discover-subject ${lib.escapeShellArg cfg.discoverSubject}"
|
|
]
|
|
++ lib.optional (cfg.role != null) "--role ${lib.escapeShellArg cfg.role}"
|
|
++ map (s: "--deploy-subject ${lib.escapeShellArg s}") cfg.deploySubjects
|
|
++ lib.optionals cfg.metrics.enable [
|
|
"--metrics-enabled"
|
|
"--metrics-addr ${lib.escapeShellArg cfg.metrics.address}"
|
|
]);
|
|
|
|
# Extract port from metrics address for firewall rule
|
|
metricsPort = let
|
|
addr = cfg.metrics.address;
|
|
# Handle both ":9972" and "0.0.0.0:9972" formats
|
|
parts = lib.splitString ":" addr;
|
|
in lib.toInt (lib.last parts);
|
|
|
|
in
|
|
{
|
|
options.services.homelab-deploy.listener = {
|
|
enable = lib.mkEnableOption "homelab-deploy listener service";
|
|
|
|
package = lib.mkOption {
|
|
type = lib.types.package;
|
|
default = self.packages.${pkgs.system}.homelab-deploy;
|
|
description = "The homelab-deploy package to use";
|
|
};
|
|
|
|
hostname = lib.mkOption {
|
|
type = lib.types.str;
|
|
default = config.networking.hostName;
|
|
description = "Hostname for this listener (used in subject templates)";
|
|
};
|
|
|
|
tier = lib.mkOption {
|
|
type = lib.types.enum [ "test" "prod" ];
|
|
description = "Deployment tier for this host";
|
|
};
|
|
|
|
role = lib.mkOption {
|
|
type = lib.types.nullOr lib.types.str;
|
|
default = null;
|
|
description = "Role for role-based deployment targeting";
|
|
};
|
|
|
|
natsUrl = lib.mkOption {
|
|
type = lib.types.str;
|
|
description = "NATS server URL";
|
|
example = "nats://nats.example.com:4222";
|
|
};
|
|
|
|
nkeyFile = lib.mkOption {
|
|
type = lib.types.path;
|
|
description = "Path to NKey seed file for NATS authentication";
|
|
example = "/run/secrets/homelab-deploy-nkey";
|
|
};
|
|
|
|
flakeUrl = lib.mkOption {
|
|
type = lib.types.str;
|
|
description = "Git flake URL for nixos-rebuild";
|
|
example = "git+https://git.example.com/user/nixos-configs.git";
|
|
};
|
|
|
|
timeout = lib.mkOption {
|
|
type = lib.types.int;
|
|
default = 600;
|
|
description = "Deployment timeout in seconds";
|
|
};
|
|
|
|
deploySubjects = lib.mkOption {
|
|
type = lib.types.listOf lib.types.str;
|
|
default = [
|
|
"deploy.<tier>.<hostname>"
|
|
"deploy.<tier>.all"
|
|
"deploy.<tier>.role.<role>"
|
|
];
|
|
description = ''
|
|
List of NATS subjects to subscribe to for deployment requests.
|
|
Template variables: <hostname>, <tier>, <role>
|
|
'';
|
|
};
|
|
|
|
discoverSubject = lib.mkOption {
|
|
type = lib.types.str;
|
|
default = "deploy.discover";
|
|
description = "NATS subject for host discovery requests";
|
|
};
|
|
|
|
environment = lib.mkOption {
|
|
type = lib.types.attrsOf lib.types.str;
|
|
default = { };
|
|
description = "Additional environment variables for the service";
|
|
example = { GIT_SSH_COMMAND = "ssh -i /run/secrets/deploy-key"; };
|
|
};
|
|
|
|
metrics = {
|
|
enable = lib.mkEnableOption "Prometheus metrics endpoint";
|
|
|
|
address = lib.mkOption {
|
|
type = lib.types.str;
|
|
default = ":9972";
|
|
description = "Address for Prometheus metrics HTTP server";
|
|
example = "127.0.0.1:9972";
|
|
};
|
|
|
|
openFirewall = lib.mkOption {
|
|
type = lib.types.bool;
|
|
default = false;
|
|
description = "Open firewall for metrics port";
|
|
};
|
|
};
|
|
};
|
|
|
|
config = lib.mkIf cfg.enable {
|
|
systemd.services.homelab-deploy-listener = {
|
|
description = "homelab-deploy listener";
|
|
wantedBy = [ "multi-user.target" ];
|
|
after = [ "network-online.target" ];
|
|
wants = [ "network-online.target" ];
|
|
|
|
# Prevent self-interruption during nixos-rebuild switch
|
|
# The service will continue running the old version until manually restarted
|
|
stopIfChanged = false;
|
|
restartIfChanged = false;
|
|
|
|
environment = cfg.environment // {
|
|
# Nix needs a writable cache for git flake fetching
|
|
XDG_CACHE_HOME = "/var/cache/homelab-deploy";
|
|
};
|
|
|
|
path = [ pkgs.git config.system.build.nixos-rebuild ];
|
|
|
|
serviceConfig = {
|
|
CacheDirectory = "homelab-deploy";
|
|
Type = "simple";
|
|
ExecStart = "${cfg.package}/bin/homelab-deploy listener ${args}";
|
|
Restart = "always";
|
|
RestartSec = 10;
|
|
|
|
# Minimal hardening - nixos-rebuild requires broad system access:
|
|
# - Write access to /nix/store for building
|
|
# - Kernel namespace support for nix sandbox builds
|
|
# - Ability to activate system configurations
|
|
# - Network access for fetching from git/cache
|
|
# Following the approach of nixos auto-upgrade which has no hardening
|
|
};
|
|
};
|
|
|
|
networking.firewall.allowedTCPPorts = lib.mkIf (cfg.metrics.enable && cfg.metrics.openFirewall) [
|
|
metricsPort
|
|
];
|
|
};
|
|
}
|