feat: add Prometheus metrics endpoint and Docker image (PLAN.md 4.2)

Add internal/metrics package with dedicated Prometheus registry exposing
SSH connection, auth attempt, session, and build info metrics. Wire into
SSH server (4 instrumentation points) and web server (/metrics endpoint).
Add dockerImage output to flake.nix via dockerTools.buildLayeredImage.
Bump version to 0.7.0.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-15 05:47:16 +01:00
parent b8fcbc7e10
commit ab07e6a8dc
14 changed files with 342 additions and 19 deletions

View File

@@ -21,8 +21,9 @@ type Config struct {
}
type WebConfig struct {
Enabled bool `toml:"enabled"`
ListenAddr string `toml:"listen_addr"`
Enabled bool `toml:"enabled"`
ListenAddr string `toml:"listen_addr"`
MetricsEnabled *bool `toml:"metrics_enabled"`
}
type ShellConfig struct {
@@ -143,6 +144,10 @@ func applyDefaults(cfg *Config) {
if cfg.Web.ListenAddr == "" {
cfg.Web.ListenAddr = ":8080"
}
if cfg.Web.MetricsEnabled == nil {
t := true
cfg.Web.MetricsEnabled = &t
}
if cfg.Shell.Hostname == "" {
cfg.Shell.Hostname = "ubuntu-server"
}

View File

@@ -0,0 +1,93 @@
package metrics
import (
"net/http"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/collectors"
"github.com/prometheus/client_golang/prometheus/promhttp"
)
// Metrics holds all Prometheus collectors for the honeypot.
type Metrics struct {
registry *prometheus.Registry
SSHConnectionsTotal *prometheus.CounterVec
SSHConnectionsActive prometheus.Gauge
AuthAttemptsTotal *prometheus.CounterVec
SessionsTotal *prometheus.CounterVec
SessionsActive prometheus.Gauge
SessionDuration prometheus.Histogram
BuildInfo *prometheus.GaugeVec
}
// New creates a new Metrics instance with all collectors registered.
func New(version string) *Metrics {
reg := prometheus.NewRegistry()
m := &Metrics{
registry: reg,
SSHConnectionsTotal: prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "oubliette_ssh_connections_total",
Help: "Total SSH connections received.",
}, []string{"outcome"}),
SSHConnectionsActive: prometheus.NewGauge(prometheus.GaugeOpts{
Name: "oubliette_ssh_connections_active",
Help: "Current active SSH connections.",
}),
AuthAttemptsTotal: prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "oubliette_auth_attempts_total",
Help: "Total authentication attempts.",
}, []string{"result", "reason"}),
SessionsTotal: prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "oubliette_sessions_total",
Help: "Total sessions created.",
}, []string{"shell"}),
SessionsActive: prometheus.NewGauge(prometheus.GaugeOpts{
Name: "oubliette_sessions_active",
Help: "Current active sessions.",
}),
SessionDuration: prometheus.NewHistogram(prometheus.HistogramOpts{
Name: "oubliette_session_duration_seconds",
Help: "Session duration in seconds.",
Buckets: []float64{1, 5, 10, 30, 60, 120, 300, 600, 1800, 3600},
}),
BuildInfo: prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "oubliette_build_info",
Help: "Build information. Always 1.",
}, []string{"version"}),
}
reg.MustRegister(
collectors.NewGoCollector(),
collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}),
m.SSHConnectionsTotal,
m.SSHConnectionsActive,
m.AuthAttemptsTotal,
m.SessionsTotal,
m.SessionsActive,
m.SessionDuration,
m.BuildInfo,
)
m.BuildInfo.WithLabelValues(version).Set(1)
// Initialize label combinations so they appear in Gather/output.
for _, outcome := range []string{"accepted", "rejected_handshake", "rejected_max_connections"} {
m.SSHConnectionsTotal.WithLabelValues(outcome)
}
for _, reason := range []string{"static_credential", "remembered_credential", "threshold_reached", "rejected"} {
m.AuthAttemptsTotal.WithLabelValues("accepted", reason)
m.AuthAttemptsTotal.WithLabelValues("rejected", reason)
}
for _, shell := range []string{"bash", "fridge", "banking", "adventure"} {
m.SessionsTotal.WithLabelValues(shell)
}
return m
}
// Handler returns an http.Handler that serves Prometheus metrics.
func (m *Metrics) Handler() http.Handler {
return promhttp.HandlerFor(m.registry, promhttp.HandlerOpts{})
}

View File

@@ -0,0 +1,62 @@
package metrics
import (
"io"
"net/http"
"net/http/httptest"
"strings"
"testing"
)
func TestNew(t *testing.T) {
m := New("1.2.3")
// Gather all metrics and check expected names exist.
families, err := m.registry.Gather()
if err != nil {
t.Fatalf("gather: %v", err)
}
want := map[string]bool{
"oubliette_ssh_connections_total": false,
"oubliette_ssh_connections_active": false,
"oubliette_auth_attempts_total": false,
"oubliette_sessions_total": false,
"oubliette_sessions_active": false,
"oubliette_session_duration_seconds": false,
"oubliette_build_info": false,
}
for _, f := range families {
if _, ok := want[f.GetName()]; ok {
want[f.GetName()] = true
}
}
for name, found := range want {
if !found {
t.Errorf("metric %q not registered", name)
}
}
}
func TestHandler(t *testing.T) {
m := New("1.2.3")
req := httptest.NewRequest(http.MethodGet, "/metrics", nil)
w := httptest.NewRecorder()
m.Handler().ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Errorf("status = %d, want 200", w.Code)
}
body, err := io.ReadAll(w.Body)
if err != nil {
t.Fatalf("reading body: %v", err)
}
if !strings.Contains(string(body), `oubliette_build_info{version="1.2.3"} 1`) {
t.Errorf("response should contain build_info metric, got:\n%s", body)
}
}

View File

@@ -15,6 +15,7 @@ import (
"git.t-juice.club/torjus/oubliette/internal/auth"
"git.t-juice.club/torjus/oubliette/internal/config"
"git.t-juice.club/torjus/oubliette/internal/detection"
"git.t-juice.club/torjus/oubliette/internal/metrics"
"git.t-juice.club/torjus/oubliette/internal/notify"
"git.t-juice.club/torjus/oubliette/internal/shell"
"git.t-juice.club/torjus/oubliette/internal/shell/adventure"
@@ -34,9 +35,10 @@ type Server struct {
connSem chan struct{} // semaphore limiting concurrent connections
shellRegistry *shell.Registry
notifier notify.Sender
metrics *metrics.Metrics
}
func New(cfg config.Config, store storage.Store, logger *slog.Logger) (*Server, error) {
func New(cfg config.Config, store storage.Store, logger *slog.Logger, m *metrics.Metrics) (*Server, error) {
registry := shell.NewRegistry()
if err := registry.Register(bash.NewBashShell(), 1); err != nil {
return nil, fmt.Errorf("registering bash shell: %w", err)
@@ -59,6 +61,7 @@ func New(cfg config.Config, store storage.Store, logger *slog.Logger) (*Server,
connSem: make(chan struct{}, cfg.SSH.MaxConnections),
shellRegistry: registry,
notifier: notify.NewSender(cfg.Notify.Webhooks, logger),
metrics: m,
}
hostKey, err := loadOrGenerateHostKey(cfg.SSH.HostKeyPath)
@@ -102,11 +105,16 @@ func (s *Server) ListenAndServe(ctx context.Context) error {
// Enforce max concurrent connections.
select {
case s.connSem <- struct{}{}:
s.metrics.SSHConnectionsActive.Inc()
go func() {
defer func() { <-s.connSem }()
defer func() {
<-s.connSem
s.metrics.SSHConnectionsActive.Dec()
}()
s.handleConn(conn)
}()
default:
s.metrics.SSHConnectionsTotal.WithLabelValues("rejected_max_connections").Inc()
s.logger.Warn("max connections reached, rejecting", "remote_addr", conn.RemoteAddr())
conn.Close()
}
@@ -118,11 +126,13 @@ func (s *Server) handleConn(conn net.Conn) {
sshConn, chans, reqs, err := ssh.NewServerConn(conn, s.sshConfig)
if err != nil {
s.metrics.SSHConnectionsTotal.WithLabelValues("rejected_handshake").Inc()
s.logger.Debug("SSH handshake failed", "remote_addr", conn.RemoteAddr(), "err", err)
return
}
defer sshConn.Close()
s.metrics.SSHConnectionsTotal.WithLabelValues("accepted").Inc()
s.logger.Info("SSH connection established",
"remote_addr", sshConn.RemoteAddr(),
"user", sshConn.User(),
@@ -171,11 +181,16 @@ func (s *Server) handleSession(channel ssh.Channel, requests <-chan *ssh.Request
}
ip := extractIP(conn.RemoteAddr())
sessionStart := time.Now()
sessionID, err := s.store.CreateSession(context.Background(), ip, conn.User(), selectedShell.Name())
if err != nil {
s.logger.Error("failed to create session", "err", err)
} else {
s.metrics.SessionsTotal.WithLabelValues(selectedShell.Name()).Inc()
s.metrics.SessionsActive.Inc()
defer func() {
s.metrics.SessionsActive.Dec()
s.metrics.SessionDuration.Observe(time.Since(sessionStart).Seconds())
if err := s.store.EndSession(context.Background(), sessionID, time.Now()); err != nil {
s.logger.Error("failed to end session", "err", err)
}
@@ -318,6 +333,12 @@ func (s *Server) passwordCallback(conn ssh.ConnMetadata, password []byte) (*ssh.
ip := extractIP(conn.RemoteAddr())
d := s.authenticator.Authenticate(ip, conn.User(), string(password))
if d.Accepted {
s.metrics.AuthAttemptsTotal.WithLabelValues("accepted", d.Reason).Inc()
} else {
s.metrics.AuthAttemptsTotal.WithLabelValues("rejected", d.Reason).Inc()
}
s.logger.Info("auth attempt",
"remote_addr", conn.RemoteAddr(),
"username", conn.User(),

View File

@@ -12,6 +12,7 @@ import (
"time"
"git.t-juice.club/torjus/oubliette/internal/config"
"git.t-juice.club/torjus/oubliette/internal/metrics"
"git.t-juice.club/torjus/oubliette/internal/storage"
"golang.org/x/crypto/ssh"
)
@@ -120,7 +121,7 @@ func TestIntegrationSSHConnect(t *testing.T) {
logger := slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelDebug}))
store := storage.NewMemoryStore()
srv, err := New(cfg, store, logger)
srv, err := New(cfg, store, logger, metrics.New("test"))
if err != nil {
t.Fatalf("creating server: %v", err)
}

View File

@@ -20,7 +20,8 @@ type Server struct {
}
// NewServer creates a new web Server with routes registered.
func NewServer(store storage.Store, logger *slog.Logger) (*Server, error) {
// If metricsHandler is non-nil, a /metrics endpoint is registered.
func NewServer(store storage.Store, logger *slog.Logger, metricsHandler http.Handler) (*Server, error) {
tmpl, err := loadTemplates()
if err != nil {
return nil, err
@@ -40,6 +41,10 @@ func NewServer(store storage.Store, logger *slog.Logger) (*Server, error) {
s.mux.HandleFunc("GET /fragments/stats", s.handleFragmentStats)
s.mux.HandleFunc("GET /fragments/active-sessions", s.handleFragmentActiveSessions)
if metricsHandler != nil {
s.mux.Handle("GET /metrics", metricsHandler)
}
return s, nil
}

View File

@@ -10,6 +10,7 @@ import (
"testing"
"time"
"git.t-juice.club/torjus/oubliette/internal/metrics"
"git.t-juice.club/torjus/oubliette/internal/storage"
)
@@ -17,7 +18,7 @@ func newTestServer(t *testing.T) *Server {
t.Helper()
store := storage.NewMemoryStore()
logger := slog.Default()
srv, err := NewServer(store, logger)
srv, err := NewServer(store, logger, nil)
if err != nil {
t.Fatalf("creating server: %v", err)
}
@@ -46,7 +47,7 @@ func newSeededTestServer(t *testing.T) *Server {
}
logger := slog.Default()
srv, err := NewServer(store, logger)
srv, err := NewServer(store, logger, nil)
if err != nil {
t.Fatalf("creating server: %v", err)
}
@@ -154,7 +155,7 @@ func TestSessionDetailHandler(t *testing.T) {
t.Fatalf("CreateSession: %v", err)
}
srv, err := NewServer(store, slog.Default())
srv, err := NewServer(store, slog.Default(), nil)
if err != nil {
t.Fatalf("NewServer: %v", err)
}
@@ -194,7 +195,7 @@ func TestAPISessionEvents(t *testing.T) {
t.Fatalf("AppendSessionEvents: %v", err)
}
srv, err := NewServer(store, slog.Default())
srv, err := NewServer(store, slog.Default(), nil)
if err != nil {
t.Fatalf("NewServer: %v", err)
}
@@ -236,6 +237,47 @@ func TestAPISessionEvents(t *testing.T) {
}
}
func TestMetricsEndpoint(t *testing.T) {
t.Run("enabled", func(t *testing.T) {
m := metrics.New("test")
store := storage.NewMemoryStore()
srv, err := NewServer(store, slog.Default(), m.Handler())
if err != nil {
t.Fatalf("NewServer: %v", err)
}
req := httptest.NewRequest(http.MethodGet, "/metrics", nil)
w := httptest.NewRecorder()
srv.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Errorf("status = %d, want 200", w.Code)
}
body := w.Body.String()
if !strings.Contains(body, `oubliette_build_info{version="test"} 1`) {
t.Errorf("response should contain build_info metric, got:\n%s", body)
}
})
t.Run("disabled", func(t *testing.T) {
store := storage.NewMemoryStore()
srv, err := NewServer(store, slog.Default(), nil)
if err != nil {
t.Fatalf("NewServer: %v", err)
}
req := httptest.NewRequest(http.MethodGet, "/metrics", nil)
w := httptest.NewRecorder()
srv.ServeHTTP(w, req)
// Without a metrics handler, /metrics falls through to the dashboard.
body := w.Body.String()
if strings.Contains(body, "oubliette_build_info") {
t.Error("response should not contain prometheus metrics when disabled")
}
})
}
func TestStaticAssets(t *testing.T) {
srv := newTestServer(t)