feat: add Prometheus metrics endpoint and Docker image (PLAN.md 4.2)
Add internal/metrics package with dedicated Prometheus registry exposing SSH connection, auth attempt, session, and build info metrics. Wire into SSH server (4 instrumentation points) and web server (/metrics endpoint). Add dockerImage output to flake.nix via dockerTools.buildLayeredImage. Bump version to 0.7.0. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -21,8 +21,9 @@ type Config struct {
|
||||
}
|
||||
|
||||
type WebConfig struct {
|
||||
Enabled bool `toml:"enabled"`
|
||||
ListenAddr string `toml:"listen_addr"`
|
||||
Enabled bool `toml:"enabled"`
|
||||
ListenAddr string `toml:"listen_addr"`
|
||||
MetricsEnabled *bool `toml:"metrics_enabled"`
|
||||
}
|
||||
|
||||
type ShellConfig struct {
|
||||
@@ -143,6 +144,10 @@ func applyDefaults(cfg *Config) {
|
||||
if cfg.Web.ListenAddr == "" {
|
||||
cfg.Web.ListenAddr = ":8080"
|
||||
}
|
||||
if cfg.Web.MetricsEnabled == nil {
|
||||
t := true
|
||||
cfg.Web.MetricsEnabled = &t
|
||||
}
|
||||
if cfg.Shell.Hostname == "" {
|
||||
cfg.Shell.Hostname = "ubuntu-server"
|
||||
}
|
||||
|
||||
93
internal/metrics/metrics.go
Normal file
93
internal/metrics/metrics.go
Normal file
@@ -0,0 +1,93 @@
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/collectors"
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
)
|
||||
|
||||
// Metrics holds all Prometheus collectors for the honeypot.
|
||||
type Metrics struct {
|
||||
registry *prometheus.Registry
|
||||
|
||||
SSHConnectionsTotal *prometheus.CounterVec
|
||||
SSHConnectionsActive prometheus.Gauge
|
||||
AuthAttemptsTotal *prometheus.CounterVec
|
||||
SessionsTotal *prometheus.CounterVec
|
||||
SessionsActive prometheus.Gauge
|
||||
SessionDuration prometheus.Histogram
|
||||
BuildInfo *prometheus.GaugeVec
|
||||
}
|
||||
|
||||
// New creates a new Metrics instance with all collectors registered.
|
||||
func New(version string) *Metrics {
|
||||
reg := prometheus.NewRegistry()
|
||||
|
||||
m := &Metrics{
|
||||
registry: reg,
|
||||
SSHConnectionsTotal: prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Name: "oubliette_ssh_connections_total",
|
||||
Help: "Total SSH connections received.",
|
||||
}, []string{"outcome"}),
|
||||
SSHConnectionsActive: prometheus.NewGauge(prometheus.GaugeOpts{
|
||||
Name: "oubliette_ssh_connections_active",
|
||||
Help: "Current active SSH connections.",
|
||||
}),
|
||||
AuthAttemptsTotal: prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Name: "oubliette_auth_attempts_total",
|
||||
Help: "Total authentication attempts.",
|
||||
}, []string{"result", "reason"}),
|
||||
SessionsTotal: prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Name: "oubliette_sessions_total",
|
||||
Help: "Total sessions created.",
|
||||
}, []string{"shell"}),
|
||||
SessionsActive: prometheus.NewGauge(prometheus.GaugeOpts{
|
||||
Name: "oubliette_sessions_active",
|
||||
Help: "Current active sessions.",
|
||||
}),
|
||||
SessionDuration: prometheus.NewHistogram(prometheus.HistogramOpts{
|
||||
Name: "oubliette_session_duration_seconds",
|
||||
Help: "Session duration in seconds.",
|
||||
Buckets: []float64{1, 5, 10, 30, 60, 120, 300, 600, 1800, 3600},
|
||||
}),
|
||||
BuildInfo: prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Name: "oubliette_build_info",
|
||||
Help: "Build information. Always 1.",
|
||||
}, []string{"version"}),
|
||||
}
|
||||
|
||||
reg.MustRegister(
|
||||
collectors.NewGoCollector(),
|
||||
collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}),
|
||||
m.SSHConnectionsTotal,
|
||||
m.SSHConnectionsActive,
|
||||
m.AuthAttemptsTotal,
|
||||
m.SessionsTotal,
|
||||
m.SessionsActive,
|
||||
m.SessionDuration,
|
||||
m.BuildInfo,
|
||||
)
|
||||
|
||||
m.BuildInfo.WithLabelValues(version).Set(1)
|
||||
|
||||
// Initialize label combinations so they appear in Gather/output.
|
||||
for _, outcome := range []string{"accepted", "rejected_handshake", "rejected_max_connections"} {
|
||||
m.SSHConnectionsTotal.WithLabelValues(outcome)
|
||||
}
|
||||
for _, reason := range []string{"static_credential", "remembered_credential", "threshold_reached", "rejected"} {
|
||||
m.AuthAttemptsTotal.WithLabelValues("accepted", reason)
|
||||
m.AuthAttemptsTotal.WithLabelValues("rejected", reason)
|
||||
}
|
||||
for _, shell := range []string{"bash", "fridge", "banking", "adventure"} {
|
||||
m.SessionsTotal.WithLabelValues(shell)
|
||||
}
|
||||
|
||||
return m
|
||||
}
|
||||
|
||||
// Handler returns an http.Handler that serves Prometheus metrics.
|
||||
func (m *Metrics) Handler() http.Handler {
|
||||
return promhttp.HandlerFor(m.registry, promhttp.HandlerOpts{})
|
||||
}
|
||||
62
internal/metrics/metrics_test.go
Normal file
62
internal/metrics/metrics_test.go
Normal file
@@ -0,0 +1,62 @@
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestNew(t *testing.T) {
|
||||
m := New("1.2.3")
|
||||
|
||||
// Gather all metrics and check expected names exist.
|
||||
families, err := m.registry.Gather()
|
||||
if err != nil {
|
||||
t.Fatalf("gather: %v", err)
|
||||
}
|
||||
|
||||
want := map[string]bool{
|
||||
"oubliette_ssh_connections_total": false,
|
||||
"oubliette_ssh_connections_active": false,
|
||||
"oubliette_auth_attempts_total": false,
|
||||
"oubliette_sessions_total": false,
|
||||
"oubliette_sessions_active": false,
|
||||
"oubliette_session_duration_seconds": false,
|
||||
"oubliette_build_info": false,
|
||||
}
|
||||
|
||||
for _, f := range families {
|
||||
if _, ok := want[f.GetName()]; ok {
|
||||
want[f.GetName()] = true
|
||||
}
|
||||
}
|
||||
|
||||
for name, found := range want {
|
||||
if !found {
|
||||
t.Errorf("metric %q not registered", name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandler(t *testing.T) {
|
||||
m := New("1.2.3")
|
||||
|
||||
req := httptest.NewRequest(http.MethodGet, "/metrics", nil)
|
||||
w := httptest.NewRecorder()
|
||||
m.Handler().ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("status = %d, want 200", w.Code)
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(w.Body)
|
||||
if err != nil {
|
||||
t.Fatalf("reading body: %v", err)
|
||||
}
|
||||
|
||||
if !strings.Contains(string(body), `oubliette_build_info{version="1.2.3"} 1`) {
|
||||
t.Errorf("response should contain build_info metric, got:\n%s", body)
|
||||
}
|
||||
}
|
||||
@@ -15,6 +15,7 @@ import (
|
||||
"git.t-juice.club/torjus/oubliette/internal/auth"
|
||||
"git.t-juice.club/torjus/oubliette/internal/config"
|
||||
"git.t-juice.club/torjus/oubliette/internal/detection"
|
||||
"git.t-juice.club/torjus/oubliette/internal/metrics"
|
||||
"git.t-juice.club/torjus/oubliette/internal/notify"
|
||||
"git.t-juice.club/torjus/oubliette/internal/shell"
|
||||
"git.t-juice.club/torjus/oubliette/internal/shell/adventure"
|
||||
@@ -34,9 +35,10 @@ type Server struct {
|
||||
connSem chan struct{} // semaphore limiting concurrent connections
|
||||
shellRegistry *shell.Registry
|
||||
notifier notify.Sender
|
||||
metrics *metrics.Metrics
|
||||
}
|
||||
|
||||
func New(cfg config.Config, store storage.Store, logger *slog.Logger) (*Server, error) {
|
||||
func New(cfg config.Config, store storage.Store, logger *slog.Logger, m *metrics.Metrics) (*Server, error) {
|
||||
registry := shell.NewRegistry()
|
||||
if err := registry.Register(bash.NewBashShell(), 1); err != nil {
|
||||
return nil, fmt.Errorf("registering bash shell: %w", err)
|
||||
@@ -59,6 +61,7 @@ func New(cfg config.Config, store storage.Store, logger *slog.Logger) (*Server,
|
||||
connSem: make(chan struct{}, cfg.SSH.MaxConnections),
|
||||
shellRegistry: registry,
|
||||
notifier: notify.NewSender(cfg.Notify.Webhooks, logger),
|
||||
metrics: m,
|
||||
}
|
||||
|
||||
hostKey, err := loadOrGenerateHostKey(cfg.SSH.HostKeyPath)
|
||||
@@ -102,11 +105,16 @@ func (s *Server) ListenAndServe(ctx context.Context) error {
|
||||
// Enforce max concurrent connections.
|
||||
select {
|
||||
case s.connSem <- struct{}{}:
|
||||
s.metrics.SSHConnectionsActive.Inc()
|
||||
go func() {
|
||||
defer func() { <-s.connSem }()
|
||||
defer func() {
|
||||
<-s.connSem
|
||||
s.metrics.SSHConnectionsActive.Dec()
|
||||
}()
|
||||
s.handleConn(conn)
|
||||
}()
|
||||
default:
|
||||
s.metrics.SSHConnectionsTotal.WithLabelValues("rejected_max_connections").Inc()
|
||||
s.logger.Warn("max connections reached, rejecting", "remote_addr", conn.RemoteAddr())
|
||||
conn.Close()
|
||||
}
|
||||
@@ -118,11 +126,13 @@ func (s *Server) handleConn(conn net.Conn) {
|
||||
|
||||
sshConn, chans, reqs, err := ssh.NewServerConn(conn, s.sshConfig)
|
||||
if err != nil {
|
||||
s.metrics.SSHConnectionsTotal.WithLabelValues("rejected_handshake").Inc()
|
||||
s.logger.Debug("SSH handshake failed", "remote_addr", conn.RemoteAddr(), "err", err)
|
||||
return
|
||||
}
|
||||
defer sshConn.Close()
|
||||
|
||||
s.metrics.SSHConnectionsTotal.WithLabelValues("accepted").Inc()
|
||||
s.logger.Info("SSH connection established",
|
||||
"remote_addr", sshConn.RemoteAddr(),
|
||||
"user", sshConn.User(),
|
||||
@@ -171,11 +181,16 @@ func (s *Server) handleSession(channel ssh.Channel, requests <-chan *ssh.Request
|
||||
}
|
||||
|
||||
ip := extractIP(conn.RemoteAddr())
|
||||
sessionStart := time.Now()
|
||||
sessionID, err := s.store.CreateSession(context.Background(), ip, conn.User(), selectedShell.Name())
|
||||
if err != nil {
|
||||
s.logger.Error("failed to create session", "err", err)
|
||||
} else {
|
||||
s.metrics.SessionsTotal.WithLabelValues(selectedShell.Name()).Inc()
|
||||
s.metrics.SessionsActive.Inc()
|
||||
defer func() {
|
||||
s.metrics.SessionsActive.Dec()
|
||||
s.metrics.SessionDuration.Observe(time.Since(sessionStart).Seconds())
|
||||
if err := s.store.EndSession(context.Background(), sessionID, time.Now()); err != nil {
|
||||
s.logger.Error("failed to end session", "err", err)
|
||||
}
|
||||
@@ -318,6 +333,12 @@ func (s *Server) passwordCallback(conn ssh.ConnMetadata, password []byte) (*ssh.
|
||||
ip := extractIP(conn.RemoteAddr())
|
||||
d := s.authenticator.Authenticate(ip, conn.User(), string(password))
|
||||
|
||||
if d.Accepted {
|
||||
s.metrics.AuthAttemptsTotal.WithLabelValues("accepted", d.Reason).Inc()
|
||||
} else {
|
||||
s.metrics.AuthAttemptsTotal.WithLabelValues("rejected", d.Reason).Inc()
|
||||
}
|
||||
|
||||
s.logger.Info("auth attempt",
|
||||
"remote_addr", conn.RemoteAddr(),
|
||||
"username", conn.User(),
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
"time"
|
||||
|
||||
"git.t-juice.club/torjus/oubliette/internal/config"
|
||||
"git.t-juice.club/torjus/oubliette/internal/metrics"
|
||||
"git.t-juice.club/torjus/oubliette/internal/storage"
|
||||
"golang.org/x/crypto/ssh"
|
||||
)
|
||||
@@ -120,7 +121,7 @@ func TestIntegrationSSHConnect(t *testing.T) {
|
||||
|
||||
logger := slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelDebug}))
|
||||
store := storage.NewMemoryStore()
|
||||
srv, err := New(cfg, store, logger)
|
||||
srv, err := New(cfg, store, logger, metrics.New("test"))
|
||||
if err != nil {
|
||||
t.Fatalf("creating server: %v", err)
|
||||
}
|
||||
|
||||
@@ -20,7 +20,8 @@ type Server struct {
|
||||
}
|
||||
|
||||
// NewServer creates a new web Server with routes registered.
|
||||
func NewServer(store storage.Store, logger *slog.Logger) (*Server, error) {
|
||||
// If metricsHandler is non-nil, a /metrics endpoint is registered.
|
||||
func NewServer(store storage.Store, logger *slog.Logger, metricsHandler http.Handler) (*Server, error) {
|
||||
tmpl, err := loadTemplates()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -40,6 +41,10 @@ func NewServer(store storage.Store, logger *slog.Logger) (*Server, error) {
|
||||
s.mux.HandleFunc("GET /fragments/stats", s.handleFragmentStats)
|
||||
s.mux.HandleFunc("GET /fragments/active-sessions", s.handleFragmentActiveSessions)
|
||||
|
||||
if metricsHandler != nil {
|
||||
s.mux.Handle("GET /metrics", metricsHandler)
|
||||
}
|
||||
|
||||
return s, nil
|
||||
}
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"git.t-juice.club/torjus/oubliette/internal/metrics"
|
||||
"git.t-juice.club/torjus/oubliette/internal/storage"
|
||||
)
|
||||
|
||||
@@ -17,7 +18,7 @@ func newTestServer(t *testing.T) *Server {
|
||||
t.Helper()
|
||||
store := storage.NewMemoryStore()
|
||||
logger := slog.Default()
|
||||
srv, err := NewServer(store, logger)
|
||||
srv, err := NewServer(store, logger, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("creating server: %v", err)
|
||||
}
|
||||
@@ -46,7 +47,7 @@ func newSeededTestServer(t *testing.T) *Server {
|
||||
}
|
||||
|
||||
logger := slog.Default()
|
||||
srv, err := NewServer(store, logger)
|
||||
srv, err := NewServer(store, logger, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("creating server: %v", err)
|
||||
}
|
||||
@@ -154,7 +155,7 @@ func TestSessionDetailHandler(t *testing.T) {
|
||||
t.Fatalf("CreateSession: %v", err)
|
||||
}
|
||||
|
||||
srv, err := NewServer(store, slog.Default())
|
||||
srv, err := NewServer(store, slog.Default(), nil)
|
||||
if err != nil {
|
||||
t.Fatalf("NewServer: %v", err)
|
||||
}
|
||||
@@ -194,7 +195,7 @@ func TestAPISessionEvents(t *testing.T) {
|
||||
t.Fatalf("AppendSessionEvents: %v", err)
|
||||
}
|
||||
|
||||
srv, err := NewServer(store, slog.Default())
|
||||
srv, err := NewServer(store, slog.Default(), nil)
|
||||
if err != nil {
|
||||
t.Fatalf("NewServer: %v", err)
|
||||
}
|
||||
@@ -236,6 +237,47 @@ func TestAPISessionEvents(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestMetricsEndpoint(t *testing.T) {
|
||||
t.Run("enabled", func(t *testing.T) {
|
||||
m := metrics.New("test")
|
||||
store := storage.NewMemoryStore()
|
||||
srv, err := NewServer(store, slog.Default(), m.Handler())
|
||||
if err != nil {
|
||||
t.Fatalf("NewServer: %v", err)
|
||||
}
|
||||
|
||||
req := httptest.NewRequest(http.MethodGet, "/metrics", nil)
|
||||
w := httptest.NewRecorder()
|
||||
srv.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("status = %d, want 200", w.Code)
|
||||
}
|
||||
body := w.Body.String()
|
||||
if !strings.Contains(body, `oubliette_build_info{version="test"} 1`) {
|
||||
t.Errorf("response should contain build_info metric, got:\n%s", body)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("disabled", func(t *testing.T) {
|
||||
store := storage.NewMemoryStore()
|
||||
srv, err := NewServer(store, slog.Default(), nil)
|
||||
if err != nil {
|
||||
t.Fatalf("NewServer: %v", err)
|
||||
}
|
||||
|
||||
req := httptest.NewRequest(http.MethodGet, "/metrics", nil)
|
||||
w := httptest.NewRecorder()
|
||||
srv.ServeHTTP(w, req)
|
||||
|
||||
// Without a metrics handler, /metrics falls through to the dashboard.
|
||||
body := w.Body.String()
|
||||
if strings.Contains(body, "oubliette_build_info") {
|
||||
t.Error("response should not contain prometheus metrics when disabled")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestStaticAssets(t *testing.T) {
|
||||
srv := newTestServer(t)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user