Add tracing to tlsconmon

This commit is contained in:
Torjus Håkestad 2025-05-25 03:41:04 +02:00
parent ec6eb3c814
commit 804399d32a
Signed by: torjus
SSH Key Fingerprint: SHA256:KjAds8wHfD2mBYK2H815s/+ABcSdcIHUndwHEdSxml4
7 changed files with 222 additions and 30 deletions

View File

@ -23,6 +23,8 @@ type TLSConnectionMonitor struct {
type Config struct { type Config struct {
ListenAddr string `toml:"ListenAddr"` ListenAddr string `toml:"ListenAddr"`
Profiling bool `toml:"Profiling"` Profiling bool `toml:"Profiling"`
Tracing bool `toml:"Tracing"`
TracingEndpoint string `toml:"TracingEndpoint"`
StepMonitors []StepMonitor `toml:"StepMonitors"` StepMonitors []StepMonitor `toml:"StepMonitors"`
TLSConnectionMonitors []TLSConnectionMonitor `toml:"TLSConnectionMonitors"` TLSConnectionMonitors []TLSConnectionMonitor `toml:"TLSConnectionMonitors"`
} }

23
go.mod
View File

@ -5,15 +5,34 @@ go 1.24.3
require ( require (
github.com/pelletier/go-toml/v2 v2.2.4 github.com/pelletier/go-toml/v2 v2.2.4
github.com/prometheus/client_golang v1.22.0 github.com/prometheus/client_golang v1.22.0
go.opentelemetry.io/otel v1.36.0
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.36.0
go.opentelemetry.io/otel/sdk v1.36.0
go.opentelemetry.io/otel/trace v1.36.0
) )
require ( require (
github.com/beorn7/perks v1.0.1 // indirect github.com/beorn7/perks v1.0.1 // indirect
github.com/cenkalti/backoff/v5 v5.0.2 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/go-logr/logr v1.4.2 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.3 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/prometheus/client_model v0.6.1 // indirect github.com/prometheus/client_model v0.6.1 // indirect
github.com/prometheus/common v0.62.0 // indirect github.com/prometheus/common v0.62.0 // indirect
github.com/prometheus/procfs v0.15.1 // indirect github.com/prometheus/procfs v0.15.1 // indirect
golang.org/x/sys v0.30.0 // indirect go.opentelemetry.io/auto/sdk v1.1.0 // indirect
google.golang.org/protobuf v1.36.5 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.36.0 // indirect
go.opentelemetry.io/otel/metric v1.36.0 // indirect
go.opentelemetry.io/otel/sdk/metric v1.36.0 // indirect
go.opentelemetry.io/proto/otlp v1.6.0 // indirect
golang.org/x/net v0.40.0 // indirect
golang.org/x/sys v0.33.0 // indirect
golang.org/x/text v0.25.0 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20250519155744-55703ea1f237 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20250519155744-55703ea1f237 // indirect
google.golang.org/grpc v1.72.1 // indirect
google.golang.org/protobuf v1.36.6 // indirect
) )

51
go.sum
View File

@ -1,11 +1,24 @@
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/cenkalti/backoff/v5 v5.0.2 h1:rIfFVxEf1QsI7E1ZHfp/B4DF/6QBAUhmgkxc0H7Zss8=
github.com/cenkalti/backoff/v5 v5.0.2/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw=
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.3 h1:5ZPtiqj0JL5oKWmcsq4VMaAW5ukBEgSGXEN89zeH1Jo=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.3/go.mod h1:ndYquD05frm2vACXE1nsccT4oJzjhw2arTS2cpUD1PI=
github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
@ -26,9 +39,39 @@ github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0leargg
github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc= go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
google.golang.org/protobuf v1.36.5 h1:tPhr+woSbjfYvY6/GPufUoYizxw1cF/yFoxJ2fmpwlM= go.opentelemetry.io/otel v1.36.0 h1:UumtzIklRBY6cI/lllNZlALOF5nNIzJVb16APdvgTXg=
google.golang.org/protobuf v1.36.5/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= go.opentelemetry.io/otel v1.36.0/go.mod h1:/TcFMXYjyRNh8khOAO9ybYkqaDBb/70aVwkNML4pP8E=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.36.0 h1:dNzwXjZKpMpE2JhmO+9HsPl42NIXFIFSUSSs0fiqra0=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.36.0/go.mod h1:90PoxvaEB5n6AOdZvi+yWJQoE95U8Dhhw2bSyRqnTD0=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.36.0 h1:nRVXXvf78e00EwY6Wp0YII8ww2JVWshZ20HfTlE11AM=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.36.0/go.mod h1:r49hO7CgrxY9Voaj3Xe8pANWtr0Oq916d0XAmOoCZAQ=
go.opentelemetry.io/otel/metric v1.36.0 h1:MoWPKVhQvJ+eeXWHFBOPoBOi20jh6Iq2CcCREuTYufE=
go.opentelemetry.io/otel/metric v1.36.0/go.mod h1:zC7Ks+yeyJt4xig9DEw9kuUFe5C3zLbVjV2PzT6qzbs=
go.opentelemetry.io/otel/sdk v1.36.0 h1:b6SYIuLRs88ztox4EyrvRti80uXIFy+Sqzoh9kFULbs=
go.opentelemetry.io/otel/sdk v1.36.0/go.mod h1:+lC+mTgD+MUWfjJubi2vvXWcVxyr9rmlshZni72pXeY=
go.opentelemetry.io/otel/sdk/metric v1.36.0 h1:r0ntwwGosWGaa0CrSt8cuNuTcccMXERFwHX4dThiPis=
go.opentelemetry.io/otel/sdk/metric v1.36.0/go.mod h1:qTNOhFDfKRwX0yXOqJYegL5WRaW376QbB7P4Pb0qva4=
go.opentelemetry.io/otel/trace v1.36.0 h1:ahxWNuqZjpdiFAyrIoQ4GIiAIhxAunQR6MUoKrsNd4w=
go.opentelemetry.io/otel/trace v1.36.0/go.mod h1:gQ+OnDZzrybY4k4seLzPAWNwVBBVlF2szhehOBB/tGA=
go.opentelemetry.io/proto/otlp v1.6.0 h1:jQjP+AQyTf+Fe7OKj/MfkDrmK4MNVtw2NpXsf9fefDI=
go.opentelemetry.io/proto/otlp v1.6.0/go.mod h1:cicgGehlFuNdgZkcALOCh3VE6K/u2tAjzlRhDwmVpZc=
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
golang.org/x/net v0.40.0 h1:79Xs7wF06Gbdcg4kdCCIQArK11Z1hr5POQ6+fIYHNuY=
golang.org/x/net v0.40.0/go.mod h1:y0hY0exeL2Pku80/zKK7tpntoX23cqL3Oa6njdgRtds=
golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw=
golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
golang.org/x/text v0.25.0 h1:qVyWApTSYLk/drJRO5mDlNYskwQznZmkpV2c8q9zls4=
golang.org/x/text v0.25.0/go.mod h1:WEdwpYrmk1qmdHvhkSTNPm3app7v4rsT8F2UD6+VHIA=
google.golang.org/genproto/googleapis/api v0.0.0-20250519155744-55703ea1f237 h1:Kog3KlB4xevJlAcbbbzPfRG0+X9fdoGM+UBRKVz6Wr0=
google.golang.org/genproto/googleapis/api v0.0.0-20250519155744-55703ea1f237/go.mod h1:ezi0AVyMKDWy5xAncvjLWH7UcLBB5n7y2fQ8MzjJcto=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250519155744-55703ea1f237 h1:cJfm9zPbe1e873mHJzmQ1nwVEeRDU/T1wXDK2kUSU34=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250519155744-55703ea1f237/go.mod h1:qQ0YXyHHx3XkvlzUtpXDkS29lDSafHMZBAZDc03LQ3A=
google.golang.org/grpc v1.72.1 h1:HR03wO6eyZ7lknl75XlxABNVLLFc2PAb6mHlYh756mA=
google.golang.org/grpc v1.72.1/go.mod h1:wH5Aktxcg25y1I3w7H69nHfXdOG3UiadoBtjh3izSDM=
google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY=
google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

View File

@ -1,6 +1,8 @@
# Endpoint for the metrics server # Endpoint for the metrics server
ListenAddr = ":9969" ListenAddr = ":9969"
Profiling = true Profiling = true
Tracing = true
TracingEndpoint = "monitoring01.home.2rjus.net:4318"
# Monitor step-ca root certificate # Monitor step-ca root certificate
[[StepMonitors]] [[StepMonitors]]
@ -12,4 +14,9 @@ RootID = "3381bda8015a86b9a3cd1851439d1091890a79005e0f1f7c4301fe4bccc29d80"
Enabled = true Enabled = true
Address = "ca.home.2rjus.net:443" Address = "ca.home.2rjus.net:443"
Verify = true Verify = true
Duration = "1h" Duration = "10s"
[[TLSConnectionMonitors]]
Enabled = true
Address = "jelly.home.2rjus.net:443"
Verify = true
Duration = "10s"

30
main.go
View File

@ -11,6 +11,7 @@ import (
"time" "time"
"git.t-juice.club/torjus/labmon/config" "git.t-juice.club/torjus/labmon/config"
"git.t-juice.club/torjus/labmon/otel"
"git.t-juice.club/torjus/labmon/stepmon" "git.t-juice.club/torjus/labmon/stepmon"
"git.t-juice.club/torjus/labmon/tlsconmon" "git.t-juice.club/torjus/labmon/tlsconmon"
"github.com/prometheus/client_golang/prometheus/promhttp" "github.com/prometheus/client_golang/prometheus/promhttp"
@ -44,6 +45,23 @@ func main() {
Level: slog.LevelDebug, Level: slog.LevelDebug,
})) }))
// Setup graceful shutdown
ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt)
defer cancel()
shutdownDone := make(chan struct{}, 1)
// Setup otel
otelShutdown := func(ctx context.Context) error { return nil }
if cfg.Tracing {
var err error
otelShutdown, err = otel.SetupOTEL(ctx, cfg.TracingEndpoint)
if err != nil {
fmt.Printf("Error setting up OpenTelemetry: %v\n", err)
os.Exit(1)
}
}
// Setup stepmons // Setup stepmons
var stepmons []*stepmon.StepMonitor var stepmons []*stepmon.StepMonitor
for _, s := range cfg.StepMonitors { for _, s := range cfg.StepMonitors {
@ -84,16 +102,10 @@ func main() {
// Start tlsconmons // Start tlsconmons
for _, tm := range tlsconmons { for _, tm := range tlsconmons {
go func(tm *tlsconmon.TLSConnectionMonitor) { go func(tm *tlsconmon.TLSConnectionMonitor) {
tm.Start() tm.Start(ctx)
}(tm) }(tm)
} }
// Setup graceful shutdown
ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt)
defer cancel()
shutdownDone := make(chan struct{}, 1)
// Start http server // Start http server
srv := &http.Server{} srv := &http.Server{}
srv.Addr = cfg.ListenAddr srv.Addr = cfg.ListenAddr
@ -137,6 +149,10 @@ func main() {
tm.Shutdown() tm.Shutdown()
logger.Debug("TLSConnectionMonitor shutdown complete", "address", tm.Address) logger.Debug("TLSConnectionMonitor shutdown complete", "address", tm.Address)
} }
if err := otelShutdown(context.Background()); err != nil {
logger.Warn("Error shutting down OpenTelemetry", "error", err)
}
}() }()
<-shutdownDone <-shutdownDone

75
otel/otel.go Normal file
View File

@ -0,0 +1,75 @@
package otel
import (
"context"
"errors"
"time"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp"
"go.opentelemetry.io/otel/propagation"
"go.opentelemetry.io/otel/sdk/resource"
"go.opentelemetry.io/otel/sdk/trace"
semconv "go.opentelemetry.io/otel/semconv/v1.32.0"
)
func SetupOTEL(ctx context.Context, traceEndpoint string) (shutdown func(context.Context) error, err error) {
var shutdownFuncs []func(context.Context) error
shutdown = func(ctx context.Context) error {
var err error
for _, fn := range shutdownFuncs {
err = errors.Join(err, fn(ctx))
}
shutdownFuncs = nil
return err
}
handlerErr := func(inErr error) {
err = errors.Join(inErr, shutdown(ctx))
}
prop := newPropagator()
otel.SetTextMapPropagator(prop)
tracerProvider, err := newTracerProvider(ctx, traceEndpoint)
if err != nil {
handlerErr(err)
return
}
shutdownFuncs = append(shutdownFuncs, tracerProvider.Shutdown)
otel.SetTracerProvider(tracerProvider)
return
}
func newPropagator() propagation.TextMapPropagator {
return propagation.NewCompositeTextMapPropagator(
propagation.TraceContext{},
propagation.Baggage{},
)
}
func newTracerProvider(ctx context.Context, traceEndpoint string) (*trace.TracerProvider, error) {
traceExporter, err := otlptracehttp.New(ctx,
otlptracehttp.WithEndpoint(traceEndpoint),
otlptracehttp.WithInsecure(),
)
if err != nil {
return nil, err
}
traceResource, err := resource.New(ctx, resource.WithAttributes(
semconv.ServiceNamespaceKey.String("t-juice.club"),
semconv.ServiceName("labmon"),
))
if err != nil {
return nil, err
}
tracerProvider := trace.NewTracerProvider(
trace.WithBatcher(traceExporter, trace.WithBatchTimeout(time.Second)),
trace.WithResource(traceResource),
)
return tracerProvider, nil
}

View File

@ -1,6 +1,7 @@
package tlsconmon package tlsconmon
import ( import (
"context"
"crypto/tls" "crypto/tls"
"crypto/x509" "crypto/x509"
"encoding/pem" "encoding/pem"
@ -12,21 +13,33 @@ import (
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto" "github.com/prometheus/client_golang/prometheus/promauto"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/codes"
"go.opentelemetry.io/otel/trace"
) )
var gaugeCertTimeLeft = promauto.NewGaugeVec(prometheus.GaugeOpts{ const name = "git.t-juice.com/labmon/tlsconmon"
var (
// Prometheus metrics
gaugeCertTimeLeft = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: "labmon", Namespace: "labmon",
Subsystem: "tlsconmon", Subsystem: "tlsconmon",
Name: "certificate_seconds_left", Name: "certificate_seconds_left",
Help: "Seconds left until the certificate expires.", Help: "Seconds left until the certificate expires.",
}, []string{"address"}) }, []string{"address"})
var gaugeCertError = promauto.NewGaugeVec(prometheus.GaugeOpts{ gaugeCertError = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: "labmon", Namespace: "labmon",
Subsystem: "tlsconmon", Subsystem: "tlsconmon",
Name: "certificate_check_error", Name: "certificate_check_error",
Help: "Error checking the certificate.", Help: "Error checking the certificate.",
}, []string{"address"}) }, []string{"address"})
// OTEL tracing
tracer = otel.Tracer(name)
)
type TLSConnectionMonitor struct { type TLSConnectionMonitor struct {
Address string Address string
@ -41,6 +54,9 @@ type TLSConnectionMonitor struct {
cert *x509.Certificate cert *x509.Certificate
} }
func init() {
}
func NewTLSConnectionMonitor(address string, verify bool, extraCAPaths []string, duration time.Duration) (*TLSConnectionMonitor, error) { func NewTLSConnectionMonitor(address string, verify bool, extraCAPaths []string, duration time.Duration) (*TLSConnectionMonitor, error) {
var extraCAs []*x509.Certificate var extraCAs []*x509.Certificate
for _, path := range extraCAPaths { for _, path := range extraCAPaths {
@ -84,8 +100,8 @@ func (tm *TLSConnectionMonitor) SetLogger(logger *slog.Logger) {
tm.logger = logger.With("component", "tlsconmon", "address", tm.Address) tm.logger = logger.With("component", "tlsconmon", "address", tm.Address)
} }
func (tm *TLSConnectionMonitor) Start() { func (tm *TLSConnectionMonitor) Start(ctx context.Context) {
if err := tm.fetchCert(); err != nil { if err := tm.fetchCert(ctx); err != nil {
gaugeCertError.WithLabelValues(tm.Address).Set(1) gaugeCertError.WithLabelValues(tm.Address).Set(1)
gaugeCertTimeLeft.WithLabelValues(tm.Address).Set(0) gaugeCertTimeLeft.WithLabelValues(tm.Address).Set(0)
} else { } else {
@ -102,7 +118,7 @@ func (tm *TLSConnectionMonitor) Start() {
for { for {
select { select {
case <-timerCertFetch.C: case <-timerCertFetch.C:
if err := tm.fetchCert(); err != nil { if err := tm.fetchCert(ctx); err != nil {
gaugeCertError.WithLabelValues(tm.Address).Set(1) gaugeCertError.WithLabelValues(tm.Address).Set(1)
} else { } else {
gaugeCertError.WithLabelValues(tm.Address).Set(0) gaugeCertError.WithLabelValues(tm.Address).Set(0)
@ -127,14 +143,22 @@ func (tm *TLSConnectionMonitor) Shutdown() {
close(tm.shutdownComplete) close(tm.shutdownComplete)
} }
func (tm *TLSConnectionMonitor) fetchCert() error { func (tm *TLSConnectionMonitor) fetchCert(ctx context.Context) error {
ctx, span := tracer.Start(ctx, "fetch_cert")
defer span.End()
span.SetAttributes(attribute.String("cert_address", tm.Address))
span.AddEvent("load_system_cert_pool")
pool, err := x509.SystemCertPool() pool, err := x509.SystemCertPool()
if err != nil { if err != nil {
tm.logger.Error("Failed to load system cert pool", "error", err) tm.logger.Error("Failed to load system cert pool", "error", err)
span.SetStatus(codes.Error, "Failed to fetch certificate")
return fmt.Errorf("failed to load system cert pool: %w", err) return fmt.Errorf("failed to load system cert pool: %w", err)
} }
for _, cert := range tm.extraCAs { for _, cert := range tm.extraCAs {
span.AddEvent("add_extra_ca", trace.WithAttributes(attribute.String("ca_cn", cert.Subject.CommonName)))
pool.AddCert(cert) pool.AddCert(cert)
} }
@ -146,15 +170,21 @@ func (tm *TLSConnectionMonitor) fetchCert() error {
tlsConf.RootCAs = pool tlsConf.RootCAs = pool
} }
_, dialSpan := tracer.Start(ctx, "dial_tls")
defer dialSpan.End()
conn, err := tls.Dial("tcp", tm.Address, tlsConf) conn, err := tls.Dial("tcp", tm.Address, tlsConf)
if err != nil { if err != nil {
tm.logger.Error("Failed to connect to TLS server", "error", err) tm.logger.Error("Failed to connect to TLS server", "error", err)
dialSpan.SetStatus(codes.Error, "Failed to fetch certificate")
return fmt.Errorf("failed to connect to TLS server: %w", err) return fmt.Errorf("failed to connect to TLS server: %w", err)
} }
defer conn.Close() defer conn.Close()
dialSpan.SetStatus(codes.Ok, "Fetched certificate successfully")
dialSpan.End()
tm.cert = conn.ConnectionState().PeerCertificates[0] tm.cert = conn.ConnectionState().PeerCertificates[0]
tm.logger.Info("Fetched certificate", "not_after", tm.cert.NotAfter, "subject", tm.cert.Subject) tm.logger.Info("Fetched certificate", "not_after", tm.cert.NotAfter, "subject", tm.cert.Subject)
span.SetStatus(codes.Ok, "Certificate fetched successfully")
return nil return nil
} }