From 4a31b9fc4b00d528995447f1ad1997bf56f745cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torjus=20H=C3=A5kestad?= Date: Sat, 24 May 2025 02:11:35 +0200 Subject: [PATCH] Add tlsconmon component --- config/config.go | 13 +++- labmon.toml | 5 ++ main.go | 35 +++++++++ stepmon/stepmon.go | 5 +- tlsconmon/tlsconmon.go | 160 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 215 insertions(+), 3 deletions(-) create mode 100644 tlsconmon/tlsconmon.go diff --git a/config/config.go b/config/config.go index 126ed80..4686fbb 100644 --- a/config/config.go +++ b/config/config.go @@ -12,9 +12,18 @@ type StepMonitor struct { RootID string `toml:"RootID"` } +type TLSConnectionMonitor struct { + Enabled bool `toml:"Enabled"` + Address string `toml:"Address"` + Verify bool `toml:"Verify"` + ExtraCAPaths []string `toml:"ExtraCAPaths"` + Duration string `toml:"Duration"` +} + type Config struct { - ListenAddr string `toml:"ListenAddr"` - StepMonitors []StepMonitor `toml:"StepMonitors"` + ListenAddr string `toml:"ListenAddr"` + StepMonitors []StepMonitor `toml:"StepMonitors"` + TLSConnectionMonitors []TLSConnectionMonitor `toml:"TLSConnectionMonitors"` } func FromFile(file string) (*Config, error) { diff --git a/labmon.toml b/labmon.toml index ed74e3f..f979021 100644 --- a/labmon.toml +++ b/labmon.toml @@ -7,3 +7,8 @@ Enabled = true BaseURL = "https://ca.home.2rjus.net" RootID = "3381bda8015a86b9a3cd1851439d1091890a79005e0f1f7c4301fe4bccc29d80" +[[TLSConnectionMonitors]] +Enabled = true +Address = "ca.home.2rjus.net:443" +Verify = true +Duration = "1h" diff --git a/main.go b/main.go index f38e813..df88289 100644 --- a/main.go +++ b/main.go @@ -7,9 +7,11 @@ import ( "net/http" "os" "os/signal" + "time" "git.t-juice.club/torjus/labmon/config" "git.t-juice.club/torjus/labmon/stepmon" + "git.t-juice.club/torjus/labmon/tlsconmon" "github.com/prometheus/client_golang/prometheus/promhttp" ) @@ -54,6 +56,33 @@ func main() { }(sm) } + // Setup tlsconmons + var tlsconmons []*tlsconmon.TLSConnectionMonitor + for _, t := range config.TLSConnectionMonitors { + if t.Enabled { + duration, err := time.ParseDuration(t.Duration) + if err != nil { + logger.Error("Failed to parse duration", "duration", t.Duration, "error", err) + os.Exit(1) + } + tm, err := tlsconmon.NewTLSConnectionMonitor(t.Address, t.Verify, t.ExtraCAPaths, duration) + if err != nil { + logger.Error("Failed to create TLSConnectionMonitor", "address", t.Address, "error", err) + os.Exit(1) + } + tm.SetLogger(logger) + + tlsconmons = append(tlsconmons, tm) + } + } + + // Start tlsconmons + for _, tm := range tlsconmons { + go func(tm *tlsconmon.TLSConnectionMonitor) { + tm.Start() + }(tm) + } + // Setup graceful shutdown ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt) defer cancel() @@ -89,6 +118,12 @@ func main() { logger.Debug("StepMonitor shutdown complete", "root_id", sm.RootID) } shutdownDone <- struct{}{} + + // Shutdown tlsconmons + for _, tm := range tlsconmons { + tm.Shutdown() + logger.Debug("TLSConnectionMonitor shutdown complete", "address", tm.Address) + } }() <-shutdownDone diff --git a/stepmon/stepmon.go b/stepmon/stepmon.go index ca564c0..1b62aae 100644 --- a/stepmon/stepmon.go +++ b/stepmon/stepmon.go @@ -62,6 +62,8 @@ func (sm *StepMonitor) Start() { } else { stepCertError.WithLabelValues(sm.RootID).Set(0) } + secondsLeft := time.Until(sm.certificate.NotAfter).Seconds() + stepCertTimeLeft.WithLabelValues(sm.RootID).Set(secondsLeft) timerCertFetch := time.NewTimer(5 * time.Minute) defer timerCertFetch.Stop() @@ -87,7 +89,6 @@ func (sm *StepMonitor) Start() { timerUpdateMonitor.Reset(1 * time.Second) case <-sm.shutdownCh: - close(sm.shutdownCh) sm.shutdownComplete <- struct{}{} return } @@ -96,6 +97,8 @@ func (sm *StepMonitor) Start() { func (sm *StepMonitor) Shutdown() error { sm.shutdownCh <- struct{}{} + close(sm.shutdownCh) + <-sm.shutdownComplete close(sm.shutdownComplete) sm.logger.Info("Monitoring stopped") diff --git a/tlsconmon/tlsconmon.go b/tlsconmon/tlsconmon.go new file mode 100644 index 0000000..2fdaeca --- /dev/null +++ b/tlsconmon/tlsconmon.go @@ -0,0 +1,160 @@ +package tlsconmon + +import ( + "crypto/tls" + "crypto/x509" + "encoding/pem" + "fmt" + "io" + "log/slog" + "os" + "time" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" +) + +var gaugeCertTimeLeft = promauto.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: "labmon", + Subsystem: "tlsconmon", + Name: "certificate_seconds_left", + Help: "Seconds left until the certificate expires.", +}, []string{"address"}) + +var gaugeCertError = promauto.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: "labmon", + Subsystem: "tlsconmon", + Name: "certificate_check_error", + Help: "Error checking the certificate.", +}, []string{"address"}) + +type TLSConnectionMonitor struct { + Address string + Verify bool + CheckDuration time.Duration + extraCAs []*x509.Certificate + + logger *slog.Logger + shutdownCh chan struct{} + shutdownComplete chan struct{} + + cert *x509.Certificate +} + +func NewTLSConnectionMonitor(address string, verify bool, extraCAPaths []string, duration time.Duration) (*TLSConnectionMonitor, error) { + var extraCAs []*x509.Certificate + for _, path := range extraCAPaths { + f, err := os.Open(path) + if err != nil { + return nil, fmt.Errorf("failed to open extra cert file %s: %w", path, err) + } + defer f.Close() + + data, err := io.ReadAll(f) + if err != nil { + return nil, fmt.Errorf("failed to read extra cert file %s: %w", path, err) + } + + pemBlock, _ := pem.Decode(data) + if pemBlock.Type != "CERTIFICATE" { + return nil, fmt.Errorf("invalid PEM block type in extra ca file %s: %s", path, pemBlock.Type) + } + + cert, err := x509.ParseCertificate(pemBlock.Bytes) + if err != nil { + return nil, fmt.Errorf("failed to parse extra cert file %s: %w", path, err) + } + + extraCAs = append(extraCAs, cert) + } + return &TLSConnectionMonitor{ + Address: address, + Verify: verify, + extraCAs: extraCAs, + CheckDuration: duration, + + logger: slog.New(slog.NewTextHandler(io.Discard, nil)), + + shutdownCh: make(chan struct{}), + shutdownComplete: make(chan struct{}, 1), + }, nil +} + +func (tm *TLSConnectionMonitor) SetLogger(logger *slog.Logger) { + tm.logger = logger.With("component", "tlsconmon", "address", tm.Address) +} + +func (tm *TLSConnectionMonitor) Start() { + if err := tm.fetchCert(); err != nil { + gaugeCertError.WithLabelValues(tm.Address).Set(1) + gaugeCertTimeLeft.WithLabelValues(tm.Address).Set(0) + } else { + gaugeCertError.WithLabelValues(tm.Address).Set(0) + timeLeft := time.Until(tm.cert.NotAfter).Seconds() + gaugeCertTimeLeft.WithLabelValues(tm.Address).Set(timeLeft) + } + + timerCertFetch := time.NewTimer(tm.CheckDuration) + defer timerCertFetch.Stop() + timerUpdateMonitor := time.NewTimer(1 * time.Second) + defer timerUpdateMonitor.Stop() + + for { + select { + case <-timerCertFetch.C: + if err := tm.fetchCert(); err != nil { + gaugeCertError.WithLabelValues(tm.Address).Set(1) + } else { + gaugeCertError.WithLabelValues(tm.Address).Set(0) + } + timerCertFetch.Reset(tm.CheckDuration) + case <-timerUpdateMonitor.C: + timeLeft := time.Until(tm.cert.NotAfter).Seconds() + gaugeCertTimeLeft.WithLabelValues(tm.Address).Set(timeLeft) + timerUpdateMonitor.Reset(1 * time.Second) + + case <-tm.shutdownCh: + tm.shutdownComplete <- struct{}{} + return + } + } +} + +func (tm *TLSConnectionMonitor) Shutdown() { + tm.shutdownCh <- struct{}{} + close(tm.shutdownCh) + <-tm.shutdownComplete + close(tm.shutdownComplete) +} + +func (tm *TLSConnectionMonitor) fetchCert() error { + pool, err := x509.SystemCertPool() + if err != nil { + tm.logger.Error("Failed to load system cert pool", "error", err) + return fmt.Errorf("failed to load system cert pool: %w", err) + } + + for _, cert := range tm.extraCAs { + pool.AddCert(cert) + } + + tlsConf := &tls.Config{} + if !tm.Verify { + tlsConf.InsecureSkipVerify = true + } + if len(tm.extraCAs) > 0 { + tlsConf.RootCAs = pool + } + + conn, err := tls.Dial("tcp", tm.Address, tlsConf) + if err != nil { + tm.logger.Error("Failed to connect to TLS server", "error", err) + return fmt.Errorf("failed to connect to TLS server: %w", err) + } + defer conn.Close() + + tm.cert = conn.ConnectionState().PeerCertificates[0] + tm.logger.Info("Fetched certificate", "not_after", tm.cert.NotAfter, "subject", tm.cert.Subject) + + return nil +}