Add tracing to tlsconmon
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
package tlsconmon
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"crypto/x509"
|
||||
"encoding/pem"
|
||||
@@ -12,21 +13,33 @@ import (
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/promauto"
|
||||
"go.opentelemetry.io/otel"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/codes"
|
||||
"go.opentelemetry.io/otel/trace"
|
||||
)
|
||||
|
||||
var gaugeCertTimeLeft = promauto.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Namespace: "labmon",
|
||||
Subsystem: "tlsconmon",
|
||||
Name: "certificate_seconds_left",
|
||||
Help: "Seconds left until the certificate expires.",
|
||||
}, []string{"address"})
|
||||
const name = "git.t-juice.com/labmon/tlsconmon"
|
||||
|
||||
var gaugeCertError = promauto.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Namespace: "labmon",
|
||||
Subsystem: "tlsconmon",
|
||||
Name: "certificate_check_error",
|
||||
Help: "Error checking the certificate.",
|
||||
}, []string{"address"})
|
||||
var (
|
||||
// Prometheus metrics
|
||||
gaugeCertTimeLeft = promauto.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Namespace: "labmon",
|
||||
Subsystem: "tlsconmon",
|
||||
Name: "certificate_seconds_left",
|
||||
Help: "Seconds left until the certificate expires.",
|
||||
}, []string{"address"})
|
||||
|
||||
gaugeCertError = promauto.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Namespace: "labmon",
|
||||
Subsystem: "tlsconmon",
|
||||
Name: "certificate_check_error",
|
||||
Help: "Error checking the certificate.",
|
||||
}, []string{"address"})
|
||||
|
||||
// OTEL tracing
|
||||
tracer = otel.Tracer(name)
|
||||
)
|
||||
|
||||
type TLSConnectionMonitor struct {
|
||||
Address string
|
||||
@@ -41,6 +54,9 @@ type TLSConnectionMonitor struct {
|
||||
cert *x509.Certificate
|
||||
}
|
||||
|
||||
func init() {
|
||||
}
|
||||
|
||||
func NewTLSConnectionMonitor(address string, verify bool, extraCAPaths []string, duration time.Duration) (*TLSConnectionMonitor, error) {
|
||||
var extraCAs []*x509.Certificate
|
||||
for _, path := range extraCAPaths {
|
||||
@@ -84,8 +100,8 @@ func (tm *TLSConnectionMonitor) SetLogger(logger *slog.Logger) {
|
||||
tm.logger = logger.With("component", "tlsconmon", "address", tm.Address)
|
||||
}
|
||||
|
||||
func (tm *TLSConnectionMonitor) Start() {
|
||||
if err := tm.fetchCert(); err != nil {
|
||||
func (tm *TLSConnectionMonitor) Start(ctx context.Context) {
|
||||
if err := tm.fetchCert(ctx); err != nil {
|
||||
gaugeCertError.WithLabelValues(tm.Address).Set(1)
|
||||
gaugeCertTimeLeft.WithLabelValues(tm.Address).Set(0)
|
||||
} else {
|
||||
@@ -102,7 +118,7 @@ func (tm *TLSConnectionMonitor) Start() {
|
||||
for {
|
||||
select {
|
||||
case <-timerCertFetch.C:
|
||||
if err := tm.fetchCert(); err != nil {
|
||||
if err := tm.fetchCert(ctx); err != nil {
|
||||
gaugeCertError.WithLabelValues(tm.Address).Set(1)
|
||||
} else {
|
||||
gaugeCertError.WithLabelValues(tm.Address).Set(0)
|
||||
@@ -127,14 +143,22 @@ func (tm *TLSConnectionMonitor) Shutdown() {
|
||||
close(tm.shutdownComplete)
|
||||
}
|
||||
|
||||
func (tm *TLSConnectionMonitor) fetchCert() error {
|
||||
func (tm *TLSConnectionMonitor) fetchCert(ctx context.Context) error {
|
||||
ctx, span := tracer.Start(ctx, "fetch_cert")
|
||||
defer span.End()
|
||||
|
||||
span.SetAttributes(attribute.String("cert_address", tm.Address))
|
||||
|
||||
span.AddEvent("load_system_cert_pool")
|
||||
pool, err := x509.SystemCertPool()
|
||||
if err != nil {
|
||||
tm.logger.Error("Failed to load system cert pool", "error", err)
|
||||
span.SetStatus(codes.Error, "Failed to fetch certificate")
|
||||
return fmt.Errorf("failed to load system cert pool: %w", err)
|
||||
}
|
||||
|
||||
for _, cert := range tm.extraCAs {
|
||||
span.AddEvent("add_extra_ca", trace.WithAttributes(attribute.String("ca_cn", cert.Subject.CommonName)))
|
||||
pool.AddCert(cert)
|
||||
}
|
||||
|
||||
@@ -146,15 +170,21 @@ func (tm *TLSConnectionMonitor) fetchCert() error {
|
||||
tlsConf.RootCAs = pool
|
||||
}
|
||||
|
||||
_, dialSpan := tracer.Start(ctx, "dial_tls")
|
||||
defer dialSpan.End()
|
||||
conn, err := tls.Dial("tcp", tm.Address, tlsConf)
|
||||
if err != nil {
|
||||
tm.logger.Error("Failed to connect to TLS server", "error", err)
|
||||
dialSpan.SetStatus(codes.Error, "Failed to fetch certificate")
|
||||
return fmt.Errorf("failed to connect to TLS server: %w", err)
|
||||
}
|
||||
defer conn.Close()
|
||||
dialSpan.SetStatus(codes.Ok, "Fetched certificate successfully")
|
||||
dialSpan.End()
|
||||
|
||||
tm.cert = conn.ConnectionState().PeerCertificates[0]
|
||||
tm.logger.Info("Fetched certificate", "not_after", tm.cert.NotAfter, "subject", tm.cert.Subject)
|
||||
|
||||
span.SetStatus(codes.Ok, "Certificate fetched successfully")
|
||||
return nil
|
||||
}
|
||||
|
Reference in New Issue
Block a user