diff --git a/config/config.go b/config/config.go new file mode 100644 index 0000000..126ed80 --- /dev/null +++ b/config/config.go @@ -0,0 +1,34 @@ +package config + +import ( + "os" + + "github.com/pelletier/go-toml/v2" +) + +type StepMonitor struct { + Enabled bool `toml:"Enabled"` + BaseURL string `toml:"BaseURL"` + RootID string `toml:"RootID"` +} + +type Config struct { + ListenAddr string `toml:"ListenAddr"` + StepMonitors []StepMonitor `toml:"StepMonitors"` +} + +func FromFile(file string) (*Config, error) { + var config Config + + f, err := os.Open(file) + if err != nil { + return nil, err + } + decoder := toml.NewDecoder(f) + + if err := decoder.Decode(&config); err != nil { + return nil, err + } + + return &config, nil +} diff --git a/flake.nix b/flake.nix index 54f99c4..f20c1a1 100644 --- a/flake.nix +++ b/flake.nix @@ -48,7 +48,7 @@ version = version; pname = "labmon"; src = src; - vendorHash = pkgs.lib.fakeHash; + vendorHash = "sha256-l94MnEsZ/HXpRhgtb0ckTUXVrLULUeAbXezXlKnYGQk="; }; } ); diff --git a/go.mod b/go.mod index 07522bc..ebe7c91 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,19 @@ module git.t-juice.club/torjus/labmon go 1.24.3 + +require ( + github.com/pelletier/go-toml/v2 v2.2.4 + github.com/prometheus/client_golang v1.22.0 +) + +require ( + github.com/beorn7/perks v1.0.1 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/prometheus/client_model v0.6.1 // indirect + github.com/prometheus/common v0.62.0 // indirect + github.com/prometheus/procfs v0.15.1 // indirect + golang.org/x/sys v0.30.0 // indirect + google.golang.org/protobuf v1.36.5 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..33f8da8 --- /dev/null +++ b/go.sum @@ -0,0 +1,34 @@ +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= +github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= +github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= +github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/pelletier/go-toml/v2 v2.2.4 h1:mye9XuhQ6gvn5h28+VilKrrPoQVanw5PMw/TB0t5Ec4= +github.com/pelletier/go-toml/v2 v2.2.4/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_golang v1.22.0 h1:rb93p9lokFEsctTys46VnV1kLCDpVZ0a/Y92Vm0Zc6Q= +github.com/prometheus/client_golang v1.22.0/go.mod h1:R7ljNsLXhuQXYZYtw6GAE9AZg8Y7vEW5scdCXrWRXC0= +github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= +github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= +github.com/prometheus/common v0.62.0 h1:xasJaQlnWAeyHdUBeGjXmutelfJHWMRr+Fg4QszZ2Io= +github.com/prometheus/common v0.62.0/go.mod h1:vyBcEuLSvWos9B1+CyL7JZ2up+uFzXhkqml0W5zIY1I= +github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= +github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= +github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= +github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc= +golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +google.golang.org/protobuf v1.36.5 h1:tPhr+woSbjfYvY6/GPufUoYizxw1cF/yFoxJ2fmpwlM= +google.golang.org/protobuf v1.36.5/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/labmon.toml b/labmon.toml new file mode 100644 index 0000000..ed74e3f --- /dev/null +++ b/labmon.toml @@ -0,0 +1,9 @@ +# Endpoint for the metrics server +ListenAddr = ":9969" + +# Monitor step-ca root certificate +[[StepMonitors]] +Enabled = true +BaseURL = "https://ca.home.2rjus.net" +RootID = "3381bda8015a86b9a3cd1851439d1091890a79005e0f1f7c4301fe4bccc29d80" + diff --git a/main.go b/main.go index 186ca41..f38e813 100644 --- a/main.go +++ b/main.go @@ -1,9 +1,96 @@ package main -import "fmt" +import ( + "context" + "fmt" + "log/slog" + "net/http" + "os" + "os/signal" + + "git.t-juice.club/torjus/labmon/config" + "git.t-juice.club/torjus/labmon/stepmon" + "github.com/prometheus/client_golang/prometheus/promhttp" +) const Version = "0.1.0" -func main() { - fmt.Println("Hello, World!") +func LoadConfig() (*config.Config, error) { + config, err := config.FromFile("labmon.toml") + if err != nil { + return nil, err + } + + return config, nil +} + +func main() { + // Load config + config, err := LoadConfig() + if err != nil { + fmt.Printf("Error loading config: %v\n", err) + os.Exit(1) + } + + // Setup logger + logger := slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{ + Level: slog.LevelDebug, + })) + + // Setup stepmons + var stepmons []*stepmon.StepMonitor + for _, s := range config.StepMonitors { + if s.Enabled { + sm := stepmon.NewStepMonitor(s.BaseURL, s.RootID) + sm.SetLogger(logger) + stepmons = append(stepmons, sm) + } + } + + // Start stepmons + for _, sm := range stepmons { + go func(sm *stepmon.StepMonitor) { + sm.Start() + }(sm) + } + + // Setup graceful shutdown + ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt) + defer cancel() + + shutdownDone := make(chan struct{}, 1) + + // Start http server + srv := &http.Server{} + srv.Addr = config.ListenAddr + + mux := http.NewServeMux() + mux.Handle("/metrics", promhttp.Handler()) + srv.Handler = mux + // Start http server + go func() { + logger.Info("Starting HTTP server", "addr", config.ListenAddr) + if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed { + logger.Error("HTTP server error", "error", err) + } + }() + + // Wait for shutdown signal + go func() { + <-ctx.Done() + logger.Debug("Shutdown signal received") + // Shutdown metrics server + srv.Shutdown(context.Background()) + logger.Debug("HTTP server shutdown complete") + + // Shutdown stepmons + for _, sm := range stepmons { + sm.Shutdown() + logger.Debug("StepMonitor shutdown complete", "root_id", sm.RootID) + } + shutdownDone <- struct{}{} + }() + + <-shutdownDone + logger.Info("Shutdown complete") } diff --git a/stepmon/stepmon.go b/stepmon/stepmon.go new file mode 100644 index 0000000..ca564c0 --- /dev/null +++ b/stepmon/stepmon.go @@ -0,0 +1,155 @@ +package stepmon + +import ( + "context" + "crypto/x509" + "encoding/json" + "encoding/pem" + "fmt" + "io" + "log/slog" + "net/http" + "time" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" +) + +var stepCertTimeLeft = promauto.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: "labmon", + Subsystem: "stepmon", + Name: "certificate_seconds_left", + Help: "Seconds left until the certificate expires.", +}, []string{"cert_id"}) + +var stepCertError = promauto.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: "labmon", + Subsystem: "stepmon", + Name: "certificate_check_error", + Help: "Error checking the certificate.", +}, []string{"cert_id"}) + +type StepMonitor struct { + BaseURL string + RootID string + logger *slog.Logger + + certificate *x509.Certificate + shutdownCh chan struct{} + shutdownComplete chan struct{} +} + +func NewStepMonitor(baseURL string, rootID string) *StepMonitor { + return &StepMonitor{ + BaseURL: baseURL, + RootID: rootID, + logger: slog.New(slog.NewTextHandler(io.Discard, nil)), + + shutdownCh: make(chan struct{}), + shutdownComplete: make(chan struct{}, 1), + } +} + +func (sm *StepMonitor) SetLogger(logger *slog.Logger) { + sm.logger = logger.With("component", "stepmon", "root_id", sm.RootID) +} + +func (sm *StepMonitor) Start() { + sm.logger.Info("Starting monitoring") + err := sm.fetchCert() + if err != nil { + stepCertError.WithLabelValues(sm.RootID).Set(1) + } else { + stepCertError.WithLabelValues(sm.RootID).Set(0) + } + + timerCertFetch := time.NewTimer(5 * time.Minute) + defer timerCertFetch.Stop() + timerUpdateMonitor := time.NewTimer(1 * time.Second) + defer timerUpdateMonitor.Stop() + + for { + select { + case <-timerCertFetch.C: + if err := sm.fetchCert(); err != nil { + stepCertError.WithLabelValues(sm.RootID).Set(1) + } else { + stepCertError.WithLabelValues(sm.RootID).Set(0) + } + + timerCertFetch.Reset(5 * time.Minute) + + case <-timerUpdateMonitor.C: + if sm.certificate != nil { + secondsLeft := time.Until(sm.certificate.NotAfter).Seconds() + stepCertTimeLeft.WithLabelValues(sm.RootID).Set(secondsLeft) + } + timerUpdateMonitor.Reset(1 * time.Second) + + case <-sm.shutdownCh: + close(sm.shutdownCh) + sm.shutdownComplete <- struct{}{} + return + } + } +} + +func (sm *StepMonitor) Shutdown() error { + sm.shutdownCh <- struct{}{} + <-sm.shutdownComplete + close(sm.shutdownComplete) + sm.logger.Info("Monitoring stopped") + return nil +} + +func (sm *StepMonitor) fetchCert() error { + sm.logger.Debug("Fetching certificate") + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + reqUrl := fmt.Sprintf("%s/root/%s", sm.BaseURL, sm.RootID) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, reqUrl, nil) + if err != nil { + sm.logger.Error("Failed to create request", "error", err) + return err + } + + resp, err := http.DefaultClient.Do(req) + if err != nil { + sm.logger.Error("Failed to fetch certificate", "error", err) + return err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + sm.logger.Error("Failed to fetch certificate", "status", resp.Status) + return fmt.Errorf("failed to fetch certificate: %s", resp.Status) + } + + var responseBody struct { + CA string `json:"ca"` + } + + decoder := json.NewDecoder(resp.Body) + if err := decoder.Decode(&responseBody); err != nil { + sm.logger.Error("Failed to decode response", "error", err) + return err + } + + block, _ := pem.Decode([]byte(responseBody.CA)) + + if block.Type != "CERTIFICATE" { + sm.logger.Error("Invalid certificate type", "type", block.Type) + return fmt.Errorf("invalid certificate type: %s", block.Type) + } + + cert, err := x509.ParseCertificate(block.Bytes) + if err != nil { + sm.logger.Error("Failed to parse certificate", "error", err) + return err + } + + sm.logger.Debug("Successfully fetched certificate", "not_after", cert.NotAfter) + + sm.certificate = cert + return nil +}