feat: add new Prometheus metrics and bearer token auth for /metrics

Add 6 new Prometheus metrics for richer observability:
- auth_attempts_by_country_total (counter by country)
- commands_executed_total (counter by shell via OnCommand callback)
- human_score (histogram of final detection scores)
- storage_login_attempts_total, storage_unique_ips, storage_sessions_total
  (gauges via custom collector querying GetDashboardStats on each scrape)

Add optional bearer token authentication for the /metrics endpoint via
web.metrics_token config option. Uses crypto/subtle.ConstantTimeCompare.
Empty token (default) means no auth for backwards compatibility.

Also adds "cisco" to pre-initialized session/command metric labels.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-15 15:54:29 +01:00
parent 9aecc7ce02
commit df860b3061
16 changed files with 301 additions and 23 deletions

View File

@@ -1,8 +1,10 @@
package metrics
import (
"context"
"net/http"
"git.t-juice.club/torjus/oubliette/internal/storage"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/collectors"
"github.com/prometheus/client_golang/prometheus/promhttp"
@@ -12,13 +14,16 @@ import (
type Metrics struct {
registry *prometheus.Registry
SSHConnectionsTotal *prometheus.CounterVec
SSHConnectionsActive prometheus.Gauge
AuthAttemptsTotal *prometheus.CounterVec
SessionsTotal *prometheus.CounterVec
SessionsActive prometheus.Gauge
SessionDuration prometheus.Histogram
BuildInfo *prometheus.GaugeVec
SSHConnectionsTotal *prometheus.CounterVec
SSHConnectionsActive prometheus.Gauge
AuthAttemptsTotal *prometheus.CounterVec
AuthAttemptsByCountry *prometheus.CounterVec
CommandsExecuted *prometheus.CounterVec
HumanScore prometheus.Histogram
SessionsTotal *prometheus.CounterVec
SessionsActive prometheus.Gauge
SessionDuration prometheus.Histogram
BuildInfo *prometheus.GaugeVec
}
// New creates a new Metrics instance with all collectors registered.
@@ -39,6 +44,19 @@ func New(version string) *Metrics {
Name: "oubliette_auth_attempts_total",
Help: "Total authentication attempts.",
}, []string{"result", "reason"}),
AuthAttemptsByCountry: prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "oubliette_auth_attempts_by_country_total",
Help: "Total authentication attempts by country.",
}, []string{"country"}),
CommandsExecuted: prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "oubliette_commands_executed_total",
Help: "Total commands executed in shells.",
}, []string{"shell"}),
HumanScore: prometheus.NewHistogram(prometheus.HistogramOpts{
Name: "oubliette_human_score",
Help: "Distribution of final human detection scores.",
Buckets: prometheus.LinearBuckets(0, 0.1, 11), // 0.0, 0.1, ..., 1.0
}),
SessionsTotal: prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "oubliette_sessions_total",
Help: "Total sessions created.",
@@ -64,6 +82,9 @@ func New(version string) *Metrics {
m.SSHConnectionsTotal,
m.SSHConnectionsActive,
m.AuthAttemptsTotal,
m.AuthAttemptsByCountry,
m.CommandsExecuted,
m.HumanScore,
m.SessionsTotal,
m.SessionsActive,
m.SessionDuration,
@@ -80,14 +101,59 @@ func New(version string) *Metrics {
m.AuthAttemptsTotal.WithLabelValues("accepted", reason)
m.AuthAttemptsTotal.WithLabelValues("rejected", reason)
}
for _, shell := range []string{"bash", "fridge", "banking", "adventure"} {
m.SessionsTotal.WithLabelValues(shell)
for _, sh := range []string{"bash", "fridge", "banking", "adventure", "cisco"} {
m.SessionsTotal.WithLabelValues(sh)
m.CommandsExecuted.WithLabelValues(sh)
}
return m
}
// RegisterStoreCollector registers a collector that queries storage stats on each scrape.
func (m *Metrics) RegisterStoreCollector(store storage.Store) {
m.registry.MustRegister(&storeCollector{store: store})
}
// Handler returns an http.Handler that serves Prometheus metrics.
func (m *Metrics) Handler() http.Handler {
return promhttp.HandlerFor(m.registry, promhttp.HandlerOpts{})
}
// storeCollector implements prometheus.Collector, querying storage on each scrape.
type storeCollector struct {
store storage.Store
}
var (
storageLoginAttemptsDesc = prometheus.NewDesc(
"oubliette_storage_login_attempts_total",
"Total login attempts in storage.",
nil, nil,
)
storageUniqueIPsDesc = prometheus.NewDesc(
"oubliette_storage_unique_ips",
"Unique IPs in storage.",
nil, nil,
)
storageSessionsDesc = prometheus.NewDesc(
"oubliette_storage_sessions_total",
"Total sessions in storage.",
nil, nil,
)
)
func (c *storeCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- storageLoginAttemptsDesc
ch <- storageUniqueIPsDesc
ch <- storageSessionsDesc
}
func (c *storeCollector) Collect(ch chan<- prometheus.Metric) {
stats, err := c.store.GetDashboardStats(context.Background())
if err != nil {
return
}
ch <- prometheus.MustNewConstMetric(storageLoginAttemptsDesc, prometheus.GaugeValue, float64(stats.TotalAttempts))
ch <- prometheus.MustNewConstMetric(storageUniqueIPsDesc, prometheus.GaugeValue, float64(stats.UniqueIPs))
ch <- prometheus.MustNewConstMetric(storageSessionsDesc, prometheus.GaugeValue, float64(stats.TotalSessions))
}