package stepmon import ( "context" "crypto/x509" "encoding/json" "encoding/pem" "fmt" "io" "log/slog" "net/http" "time" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" ) var stepCertTimeLeft = promauto.NewGaugeVec(prometheus.GaugeOpts{ Namespace: "labmon", Subsystem: "stepmon", Name: "certificate_seconds_left", Help: "Seconds left until the certificate expires.", }, []string{"cert_id"}) var stepCertError = promauto.NewGaugeVec(prometheus.GaugeOpts{ Namespace: "labmon", Subsystem: "stepmon", Name: "certificate_check_error", Help: "Error checking the certificate.", }, []string{"cert_id"}) type StepMonitor struct { BaseURL string RootID string logger *slog.Logger certificate *x509.Certificate shutdownCh chan struct{} shutdownComplete chan struct{} } func NewStepMonitor(baseURL string, rootID string) *StepMonitor { return &StepMonitor{ BaseURL: baseURL, RootID: rootID, logger: slog.New(slog.NewTextHandler(io.Discard, nil)), shutdownCh: make(chan struct{}), shutdownComplete: make(chan struct{}, 1), } } func (sm *StepMonitor) SetLogger(logger *slog.Logger) { sm.logger = logger.With("component", "stepmon", "root_id", sm.RootID) } func (sm *StepMonitor) Start() { sm.logger.Info("Starting monitoring") err := sm.fetchCert() if err != nil { stepCertError.WithLabelValues(sm.RootID).Set(1) } else { stepCertError.WithLabelValues(sm.RootID).Set(0) } timerCertFetch := time.NewTimer(5 * time.Minute) defer timerCertFetch.Stop() timerUpdateMonitor := time.NewTimer(1 * time.Second) defer timerUpdateMonitor.Stop() for { select { case <-timerCertFetch.C: if err := sm.fetchCert(); err != nil { stepCertError.WithLabelValues(sm.RootID).Set(1) } else { stepCertError.WithLabelValues(sm.RootID).Set(0) } timerCertFetch.Reset(5 * time.Minute) case <-timerUpdateMonitor.C: if sm.certificate != nil { secondsLeft := time.Until(sm.certificate.NotAfter).Seconds() stepCertTimeLeft.WithLabelValues(sm.RootID).Set(secondsLeft) } timerUpdateMonitor.Reset(1 * time.Second) case <-sm.shutdownCh: close(sm.shutdownCh) sm.shutdownComplete <- struct{}{} return } } } func (sm *StepMonitor) Shutdown() error { sm.shutdownCh <- struct{}{} <-sm.shutdownComplete close(sm.shutdownComplete) sm.logger.Info("Monitoring stopped") return nil } func (sm *StepMonitor) fetchCert() error { sm.logger.Debug("Fetching certificate") ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() reqUrl := fmt.Sprintf("%s/root/%s", sm.BaseURL, sm.RootID) req, err := http.NewRequestWithContext(ctx, http.MethodGet, reqUrl, nil) if err != nil { sm.logger.Error("Failed to create request", "error", err) return err } resp, err := http.DefaultClient.Do(req) if err != nil { sm.logger.Error("Failed to fetch certificate", "error", err) return err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { sm.logger.Error("Failed to fetch certificate", "status", resp.Status) return fmt.Errorf("failed to fetch certificate: %s", resp.Status) } var responseBody struct { CA string `json:"ca"` } decoder := json.NewDecoder(resp.Body) if err := decoder.Decode(&responseBody); err != nil { sm.logger.Error("Failed to decode response", "error", err) return err } block, _ := pem.Decode([]byte(responseBody.CA)) if block.Type != "CERTIFICATE" { sm.logger.Error("Invalid certificate type", "type", block.Type) return fmt.Errorf("invalid certificate type: %s", block.Type) } cert, err := x509.ParseCertificate(block.Bytes) if err != nil { sm.logger.Error("Failed to parse certificate", "error", err) return err } sm.logger.Debug("Successfully fetched certificate", "not_after", cert.NotAfter) sm.certificate = cert return nil }