feat: add new Prometheus metrics and bearer token auth for /metrics

Add 6 new Prometheus metrics for richer observability:
- auth_attempts_by_country_total (counter by country)
- commands_executed_total (counter by shell via OnCommand callback)
- human_score (histogram of final detection scores)
- storage_login_attempts_total, storage_unique_ips, storage_sessions_total
  (gauges via custom collector querying GetDashboardStats on each scrape)

Add optional bearer token authentication for the /metrics endpoint via
web.metrics_token config option. Uses crypto/subtle.ConstantTimeCompare.
Empty token (default) means no auth for backwards compatibility.

Also adds "cisco" to pre-initialized session/command metric labels.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-15 15:54:29 +01:00
parent 9aecc7ce02
commit df860b3061
16 changed files with 301 additions and 23 deletions

View File

@@ -1,8 +1,10 @@
package metrics
import (
"context"
"net/http"
"git.t-juice.club/torjus/oubliette/internal/storage"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/collectors"
"github.com/prometheus/client_golang/prometheus/promhttp"
@@ -12,13 +14,16 @@ import (
type Metrics struct {
registry *prometheus.Registry
SSHConnectionsTotal *prometheus.CounterVec
SSHConnectionsActive prometheus.Gauge
AuthAttemptsTotal *prometheus.CounterVec
SessionsTotal *prometheus.CounterVec
SessionsActive prometheus.Gauge
SessionDuration prometheus.Histogram
BuildInfo *prometheus.GaugeVec
SSHConnectionsTotal *prometheus.CounterVec
SSHConnectionsActive prometheus.Gauge
AuthAttemptsTotal *prometheus.CounterVec
AuthAttemptsByCountry *prometheus.CounterVec
CommandsExecuted *prometheus.CounterVec
HumanScore prometheus.Histogram
SessionsTotal *prometheus.CounterVec
SessionsActive prometheus.Gauge
SessionDuration prometheus.Histogram
BuildInfo *prometheus.GaugeVec
}
// New creates a new Metrics instance with all collectors registered.
@@ -39,6 +44,19 @@ func New(version string) *Metrics {
Name: "oubliette_auth_attempts_total",
Help: "Total authentication attempts.",
}, []string{"result", "reason"}),
AuthAttemptsByCountry: prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "oubliette_auth_attempts_by_country_total",
Help: "Total authentication attempts by country.",
}, []string{"country"}),
CommandsExecuted: prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "oubliette_commands_executed_total",
Help: "Total commands executed in shells.",
}, []string{"shell"}),
HumanScore: prometheus.NewHistogram(prometheus.HistogramOpts{
Name: "oubliette_human_score",
Help: "Distribution of final human detection scores.",
Buckets: prometheus.LinearBuckets(0, 0.1, 11), // 0.0, 0.1, ..., 1.0
}),
SessionsTotal: prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "oubliette_sessions_total",
Help: "Total sessions created.",
@@ -64,6 +82,9 @@ func New(version string) *Metrics {
m.SSHConnectionsTotal,
m.SSHConnectionsActive,
m.AuthAttemptsTotal,
m.AuthAttemptsByCountry,
m.CommandsExecuted,
m.HumanScore,
m.SessionsTotal,
m.SessionsActive,
m.SessionDuration,
@@ -80,14 +101,59 @@ func New(version string) *Metrics {
m.AuthAttemptsTotal.WithLabelValues("accepted", reason)
m.AuthAttemptsTotal.WithLabelValues("rejected", reason)
}
for _, shell := range []string{"bash", "fridge", "banking", "adventure"} {
m.SessionsTotal.WithLabelValues(shell)
for _, sh := range []string{"bash", "fridge", "banking", "adventure", "cisco"} {
m.SessionsTotal.WithLabelValues(sh)
m.CommandsExecuted.WithLabelValues(sh)
}
return m
}
// RegisterStoreCollector registers a collector that queries storage stats on each scrape.
func (m *Metrics) RegisterStoreCollector(store storage.Store) {
m.registry.MustRegister(&storeCollector{store: store})
}
// Handler returns an http.Handler that serves Prometheus metrics.
func (m *Metrics) Handler() http.Handler {
return promhttp.HandlerFor(m.registry, promhttp.HandlerOpts{})
}
// storeCollector implements prometheus.Collector, querying storage on each scrape.
type storeCollector struct {
store storage.Store
}
var (
storageLoginAttemptsDesc = prometheus.NewDesc(
"oubliette_storage_login_attempts_total",
"Total login attempts in storage.",
nil, nil,
)
storageUniqueIPsDesc = prometheus.NewDesc(
"oubliette_storage_unique_ips",
"Unique IPs in storage.",
nil, nil,
)
storageSessionsDesc = prometheus.NewDesc(
"oubliette_storage_sessions_total",
"Total sessions in storage.",
nil, nil,
)
)
func (c *storeCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- storageLoginAttemptsDesc
ch <- storageUniqueIPsDesc
ch <- storageSessionsDesc
}
func (c *storeCollector) Collect(ch chan<- prometheus.Metric) {
stats, err := c.store.GetDashboardStats(context.Background())
if err != nil {
return
}
ch <- prometheus.MustNewConstMetric(storageLoginAttemptsDesc, prometheus.GaugeValue, float64(stats.TotalAttempts))
ch <- prometheus.MustNewConstMetric(storageUniqueIPsDesc, prometheus.GaugeValue, float64(stats.UniqueIPs))
ch <- prometheus.MustNewConstMetric(storageSessionsDesc, prometheus.GaugeValue, float64(stats.TotalSessions))
}

View File

@@ -1,11 +1,14 @@
package metrics
import (
"context"
"io"
"net/http"
"net/http/httptest"
"strings"
"testing"
"git.t-juice.club/torjus/oubliette/internal/storage"
)
func TestNew(t *testing.T) {
@@ -21,10 +24,12 @@ func TestNew(t *testing.T) {
"oubliette_ssh_connections_total": false,
"oubliette_ssh_connections_active": false,
"oubliette_auth_attempts_total": false,
"oubliette_sessions_total": false,
"oubliette_sessions_active": false,
"oubliette_commands_executed_total": false,
"oubliette_human_score": false,
"oubliette_sessions_total": false,
"oubliette_sessions_active": false,
"oubliette_session_duration_seconds": false,
"oubliette_build_info": false,
"oubliette_build_info": false,
}
for _, f := range families {
@@ -40,6 +45,31 @@ func TestNew(t *testing.T) {
}
}
func TestAuthAttemptsByCountry(t *testing.T) {
m := New("1.0.0")
m.AuthAttemptsByCountry.WithLabelValues("US").Inc()
m.AuthAttemptsByCountry.WithLabelValues("DE").Inc()
m.AuthAttemptsByCountry.WithLabelValues("US").Inc()
families, err := m.registry.Gather()
if err != nil {
t.Fatalf("gather: %v", err)
}
var found bool
for _, f := range families {
if f.GetName() == "oubliette_auth_attempts_by_country_total" {
found = true
if len(f.GetMetric()) != 2 {
t.Errorf("expected 2 label pairs (US, DE), got %d", len(f.GetMetric()))
}
}
}
if !found {
t.Error("oubliette_auth_attempts_by_country_total not found after incrementing")
}
}
func TestHandler(t *testing.T) {
m := New("1.2.3")
@@ -60,3 +90,53 @@ func TestHandler(t *testing.T) {
t.Errorf("response should contain build_info metric, got:\n%s", body)
}
}
func TestStoreCollector(t *testing.T) {
store := storage.NewMemoryStore()
ctx := context.Background()
// Seed some data.
if err := store.RecordLoginAttempt(ctx, "root", "toor", "10.0.0.1", ""); err != nil {
t.Fatalf("RecordLoginAttempt: %v", err)
}
if err := store.RecordLoginAttempt(ctx, "admin", "admin", "10.0.0.2", ""); err != nil {
t.Fatalf("RecordLoginAttempt: %v", err)
}
if _, err := store.CreateSession(ctx, "10.0.0.1", "root", "bash", ""); err != nil {
t.Fatalf("CreateSession: %v", err)
}
m := New("test")
m.RegisterStoreCollector(store)
families, err := m.registry.Gather()
if err != nil {
t.Fatalf("gather: %v", err)
}
wantMetrics := map[string]float64{
"oubliette_storage_login_attempts_total": 2,
"oubliette_storage_unique_ips": 2,
"oubliette_storage_sessions_total": 1,
}
for _, f := range families {
expected, ok := wantMetrics[f.GetName()]
if !ok {
continue
}
if len(f.GetMetric()) == 0 {
t.Errorf("metric %q has no samples", f.GetName())
continue
}
got := f.GetMetric()[0].GetGauge().GetValue()
if got != expected {
t.Errorf("metric %q = %f, want %f", f.GetName(), got, expected)
}
delete(wantMetrics, f.GetName())
}
for name := range wantMetrics {
t.Errorf("metric %q not found in gather output", name)
}
}