feat: add new Prometheus metrics and bearer token auth for /metrics
Add 6 new Prometheus metrics for richer observability: - auth_attempts_by_country_total (counter by country) - commands_executed_total (counter by shell via OnCommand callback) - human_score (histogram of final detection scores) - storage_login_attempts_total, storage_unique_ips, storage_sessions_total (gauges via custom collector querying GetDashboardStats on each scrape) Add optional bearer token authentication for the /metrics endpoint via web.metrics_token config option. Uses crypto/subtle.ConstantTimeCompare. Empty token (default) means no auth for backwards compatibility. Also adds "cisco" to pre-initialized session/command metric labels. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,8 +1,10 @@
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
|
||||
"git.t-juice.club/torjus/oubliette/internal/storage"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/collectors"
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
@@ -12,13 +14,16 @@ import (
|
||||
type Metrics struct {
|
||||
registry *prometheus.Registry
|
||||
|
||||
SSHConnectionsTotal *prometheus.CounterVec
|
||||
SSHConnectionsActive prometheus.Gauge
|
||||
AuthAttemptsTotal *prometheus.CounterVec
|
||||
SessionsTotal *prometheus.CounterVec
|
||||
SessionsActive prometheus.Gauge
|
||||
SessionDuration prometheus.Histogram
|
||||
BuildInfo *prometheus.GaugeVec
|
||||
SSHConnectionsTotal *prometheus.CounterVec
|
||||
SSHConnectionsActive prometheus.Gauge
|
||||
AuthAttemptsTotal *prometheus.CounterVec
|
||||
AuthAttemptsByCountry *prometheus.CounterVec
|
||||
CommandsExecuted *prometheus.CounterVec
|
||||
HumanScore prometheus.Histogram
|
||||
SessionsTotal *prometheus.CounterVec
|
||||
SessionsActive prometheus.Gauge
|
||||
SessionDuration prometheus.Histogram
|
||||
BuildInfo *prometheus.GaugeVec
|
||||
}
|
||||
|
||||
// New creates a new Metrics instance with all collectors registered.
|
||||
@@ -39,6 +44,19 @@ func New(version string) *Metrics {
|
||||
Name: "oubliette_auth_attempts_total",
|
||||
Help: "Total authentication attempts.",
|
||||
}, []string{"result", "reason"}),
|
||||
AuthAttemptsByCountry: prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Name: "oubliette_auth_attempts_by_country_total",
|
||||
Help: "Total authentication attempts by country.",
|
||||
}, []string{"country"}),
|
||||
CommandsExecuted: prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Name: "oubliette_commands_executed_total",
|
||||
Help: "Total commands executed in shells.",
|
||||
}, []string{"shell"}),
|
||||
HumanScore: prometheus.NewHistogram(prometheus.HistogramOpts{
|
||||
Name: "oubliette_human_score",
|
||||
Help: "Distribution of final human detection scores.",
|
||||
Buckets: prometheus.LinearBuckets(0, 0.1, 11), // 0.0, 0.1, ..., 1.0
|
||||
}),
|
||||
SessionsTotal: prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Name: "oubliette_sessions_total",
|
||||
Help: "Total sessions created.",
|
||||
@@ -64,6 +82,9 @@ func New(version string) *Metrics {
|
||||
m.SSHConnectionsTotal,
|
||||
m.SSHConnectionsActive,
|
||||
m.AuthAttemptsTotal,
|
||||
m.AuthAttemptsByCountry,
|
||||
m.CommandsExecuted,
|
||||
m.HumanScore,
|
||||
m.SessionsTotal,
|
||||
m.SessionsActive,
|
||||
m.SessionDuration,
|
||||
@@ -80,14 +101,59 @@ func New(version string) *Metrics {
|
||||
m.AuthAttemptsTotal.WithLabelValues("accepted", reason)
|
||||
m.AuthAttemptsTotal.WithLabelValues("rejected", reason)
|
||||
}
|
||||
for _, shell := range []string{"bash", "fridge", "banking", "adventure"} {
|
||||
m.SessionsTotal.WithLabelValues(shell)
|
||||
for _, sh := range []string{"bash", "fridge", "banking", "adventure", "cisco"} {
|
||||
m.SessionsTotal.WithLabelValues(sh)
|
||||
m.CommandsExecuted.WithLabelValues(sh)
|
||||
}
|
||||
|
||||
return m
|
||||
}
|
||||
|
||||
// RegisterStoreCollector registers a collector that queries storage stats on each scrape.
|
||||
func (m *Metrics) RegisterStoreCollector(store storage.Store) {
|
||||
m.registry.MustRegister(&storeCollector{store: store})
|
||||
}
|
||||
|
||||
// Handler returns an http.Handler that serves Prometheus metrics.
|
||||
func (m *Metrics) Handler() http.Handler {
|
||||
return promhttp.HandlerFor(m.registry, promhttp.HandlerOpts{})
|
||||
}
|
||||
|
||||
// storeCollector implements prometheus.Collector, querying storage on each scrape.
|
||||
type storeCollector struct {
|
||||
store storage.Store
|
||||
}
|
||||
|
||||
var (
|
||||
storageLoginAttemptsDesc = prometheus.NewDesc(
|
||||
"oubliette_storage_login_attempts_total",
|
||||
"Total login attempts in storage.",
|
||||
nil, nil,
|
||||
)
|
||||
storageUniqueIPsDesc = prometheus.NewDesc(
|
||||
"oubliette_storage_unique_ips",
|
||||
"Unique IPs in storage.",
|
||||
nil, nil,
|
||||
)
|
||||
storageSessionsDesc = prometheus.NewDesc(
|
||||
"oubliette_storage_sessions_total",
|
||||
"Total sessions in storage.",
|
||||
nil, nil,
|
||||
)
|
||||
)
|
||||
|
||||
func (c *storeCollector) Describe(ch chan<- *prometheus.Desc) {
|
||||
ch <- storageLoginAttemptsDesc
|
||||
ch <- storageUniqueIPsDesc
|
||||
ch <- storageSessionsDesc
|
||||
}
|
||||
|
||||
func (c *storeCollector) Collect(ch chan<- prometheus.Metric) {
|
||||
stats, err := c.store.GetDashboardStats(context.Background())
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
ch <- prometheus.MustNewConstMetric(storageLoginAttemptsDesc, prometheus.GaugeValue, float64(stats.TotalAttempts))
|
||||
ch <- prometheus.MustNewConstMetric(storageUniqueIPsDesc, prometheus.GaugeValue, float64(stats.UniqueIPs))
|
||||
ch <- prometheus.MustNewConstMetric(storageSessionsDesc, prometheus.GaugeValue, float64(stats.TotalSessions))
|
||||
}
|
||||
|
||||
@@ -1,11 +1,14 @@
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"git.t-juice.club/torjus/oubliette/internal/storage"
|
||||
)
|
||||
|
||||
func TestNew(t *testing.T) {
|
||||
@@ -21,10 +24,12 @@ func TestNew(t *testing.T) {
|
||||
"oubliette_ssh_connections_total": false,
|
||||
"oubliette_ssh_connections_active": false,
|
||||
"oubliette_auth_attempts_total": false,
|
||||
"oubliette_sessions_total": false,
|
||||
"oubliette_sessions_active": false,
|
||||
"oubliette_commands_executed_total": false,
|
||||
"oubliette_human_score": false,
|
||||
"oubliette_sessions_total": false,
|
||||
"oubliette_sessions_active": false,
|
||||
"oubliette_session_duration_seconds": false,
|
||||
"oubliette_build_info": false,
|
||||
"oubliette_build_info": false,
|
||||
}
|
||||
|
||||
for _, f := range families {
|
||||
@@ -40,6 +45,31 @@ func TestNew(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestAuthAttemptsByCountry(t *testing.T) {
|
||||
m := New("1.0.0")
|
||||
m.AuthAttemptsByCountry.WithLabelValues("US").Inc()
|
||||
m.AuthAttemptsByCountry.WithLabelValues("DE").Inc()
|
||||
m.AuthAttemptsByCountry.WithLabelValues("US").Inc()
|
||||
|
||||
families, err := m.registry.Gather()
|
||||
if err != nil {
|
||||
t.Fatalf("gather: %v", err)
|
||||
}
|
||||
|
||||
var found bool
|
||||
for _, f := range families {
|
||||
if f.GetName() == "oubliette_auth_attempts_by_country_total" {
|
||||
found = true
|
||||
if len(f.GetMetric()) != 2 {
|
||||
t.Errorf("expected 2 label pairs (US, DE), got %d", len(f.GetMetric()))
|
||||
}
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
t.Error("oubliette_auth_attempts_by_country_total not found after incrementing")
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandler(t *testing.T) {
|
||||
m := New("1.2.3")
|
||||
|
||||
@@ -60,3 +90,53 @@ func TestHandler(t *testing.T) {
|
||||
t.Errorf("response should contain build_info metric, got:\n%s", body)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStoreCollector(t *testing.T) {
|
||||
store := storage.NewMemoryStore()
|
||||
ctx := context.Background()
|
||||
|
||||
// Seed some data.
|
||||
if err := store.RecordLoginAttempt(ctx, "root", "toor", "10.0.0.1", ""); err != nil {
|
||||
t.Fatalf("RecordLoginAttempt: %v", err)
|
||||
}
|
||||
if err := store.RecordLoginAttempt(ctx, "admin", "admin", "10.0.0.2", ""); err != nil {
|
||||
t.Fatalf("RecordLoginAttempt: %v", err)
|
||||
}
|
||||
if _, err := store.CreateSession(ctx, "10.0.0.1", "root", "bash", ""); err != nil {
|
||||
t.Fatalf("CreateSession: %v", err)
|
||||
}
|
||||
|
||||
m := New("test")
|
||||
m.RegisterStoreCollector(store)
|
||||
|
||||
families, err := m.registry.Gather()
|
||||
if err != nil {
|
||||
t.Fatalf("gather: %v", err)
|
||||
}
|
||||
|
||||
wantMetrics := map[string]float64{
|
||||
"oubliette_storage_login_attempts_total": 2,
|
||||
"oubliette_storage_unique_ips": 2,
|
||||
"oubliette_storage_sessions_total": 1,
|
||||
}
|
||||
|
||||
for _, f := range families {
|
||||
expected, ok := wantMetrics[f.GetName()]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if len(f.GetMetric()) == 0 {
|
||||
t.Errorf("metric %q has no samples", f.GetName())
|
||||
continue
|
||||
}
|
||||
got := f.GetMetric()[0].GetGauge().GetValue()
|
||||
if got != expected {
|
||||
t.Errorf("metric %q = %f, want %f", f.GetName(), got, expected)
|
||||
}
|
||||
delete(wantMetrics, f.GetName())
|
||||
}
|
||||
|
||||
for name := range wantMetrics {
|
||||
t.Errorf("metric %q not found in gather output", name)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user