labmon/stepmon/stepmon.go

168 lines
4.3 KiB
Go

package stepmon
import (
"context"
"crypto/x509"
"encoding/json"
"encoding/pem"
"fmt"
"io"
"log/slog"
"net/http"
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)
var stepCertTimeLeft = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: "labmon",
Subsystem: "stepmon",
Name: "certificate_seconds_left",
Help: "Seconds left until the certificate expires.",
}, []string{"cert_id"})
var stepCertError = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: "labmon",
Subsystem: "stepmon",
Name: "certificate_check_error",
Help: "Error checking the certificate.",
}, []string{"cert_id"})
var stepCertLifetime = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: "labmon",
Subsystem: "stepmon",
Name: "certificate_lifetime_seconds",
Help: "How long the certificate is valid in seconds.",
}, []string{"cert_id"})
type StepMonitor struct {
BaseURL string
RootID string
logger *slog.Logger
certificate *x509.Certificate
shutdownCh chan struct{}
shutdownComplete chan struct{}
}
func NewStepMonitor(baseURL string, rootID string) *StepMonitor {
return &StepMonitor{
BaseURL: baseURL,
RootID: rootID,
logger: slog.New(slog.NewTextHandler(io.Discard, nil)),
shutdownCh: make(chan struct{}),
shutdownComplete: make(chan struct{}, 1),
}
}
func (sm *StepMonitor) SetLogger(logger *slog.Logger) {
sm.logger = logger.With("component", "stepmon", "root_id", sm.RootID)
}
func (sm *StepMonitor) Start() {
sm.logger.Info("Starting monitoring")
err := sm.fetchCert()
if err != nil {
stepCertError.WithLabelValues(sm.RootID).Set(1)
} else {
stepCertError.WithLabelValues(sm.RootID).Set(0)
}
secondsLeft := time.Until(sm.certificate.NotAfter).Seconds()
stepCertTimeLeft.WithLabelValues(sm.RootID).Set(secondsLeft)
timerCertFetch := time.NewTimer(5 * time.Minute)
defer timerCertFetch.Stop()
timerUpdateMonitor := time.NewTimer(1 * time.Second)
defer timerUpdateMonitor.Stop()
for {
select {
case <-timerCertFetch.C:
if err := sm.fetchCert(); err != nil {
stepCertError.WithLabelValues(sm.RootID).Set(1)
} else {
stepCertError.WithLabelValues(sm.RootID).Set(0)
lifetime := sm.certificate.NotAfter.Sub(sm.certificate.NotBefore).Seconds()
stepCertLifetime.WithLabelValues(sm.RootID).Set(lifetime)
}
timerCertFetch.Reset(5 * time.Minute)
case <-timerUpdateMonitor.C:
if sm.certificate != nil {
secondsLeft := time.Until(sm.certificate.NotAfter).Seconds()
stepCertTimeLeft.WithLabelValues(sm.RootID).Set(secondsLeft)
}
timerUpdateMonitor.Reset(1 * time.Second)
case <-sm.shutdownCh:
sm.shutdownComplete <- struct{}{}
return
}
}
}
func (sm *StepMonitor) Shutdown() error {
sm.shutdownCh <- struct{}{}
close(sm.shutdownCh)
<-sm.shutdownComplete
close(sm.shutdownComplete)
sm.logger.Info("Monitoring stopped")
return nil
}
func (sm *StepMonitor) fetchCert() error {
sm.logger.Debug("Fetching certificate")
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
reqUrl := fmt.Sprintf("%s/root/%s", sm.BaseURL, sm.RootID)
req, err := http.NewRequestWithContext(ctx, http.MethodGet, reqUrl, nil)
if err != nil {
sm.logger.Error("Failed to create request", "error", err)
return err
}
resp, err := http.DefaultClient.Do(req)
if err != nil {
sm.logger.Error("Failed to fetch certificate", "error", err)
return err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
sm.logger.Error("Failed to fetch certificate", "status", resp.Status)
return fmt.Errorf("failed to fetch certificate: %s", resp.Status)
}
var responseBody struct {
CA string `json:"ca"`
}
decoder := json.NewDecoder(resp.Body)
if err := decoder.Decode(&responseBody); err != nil {
sm.logger.Error("Failed to decode response", "error", err)
return err
}
block, _ := pem.Decode([]byte(responseBody.CA))
if block.Type != "CERTIFICATE" {
sm.logger.Error("Invalid certificate type", "type", block.Type)
return fmt.Errorf("invalid certificate type: %s", block.Type)
}
cert, err := x509.ParseCertificate(block.Bytes)
if err != nil {
sm.logger.Error("Failed to parse certificate", "error", err)
return err
}
sm.logger.Debug("Successfully fetched certificate", "not_after", cert.NotAfter)
sm.certificate = cert
return nil
}