feat: add human detection scoring and webhook notifications

Implement phase 2.1 (human detection) and 2.2 (notifications):

- Detection scorer computes 0.0-1.0 human likelihood from keystroke
  timing variance, special key usage, typing speed, command diversity,
  and session duration
- Webhook notifier sends JSON POST to configured endpoints with
  deduplication, custom headers, and event filtering
- RecordingChannel gains an event callback for feeding keystrokes
  to the scorer without coupling shell and detection packages
- Server wires scorer into session lifecycle with periodic updates
  and threshold-based notification triggers
- Web UI shows human score in session tables with highlighting
- New config sections: [detection] and [[notify.webhooks]]

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-14 21:28:11 +01:00
parent 96c8476f77
commit 0ad6f4cb6a
13 changed files with 1060 additions and 32 deletions

View File

@@ -0,0 +1,259 @@
package detection
import (
"math"
"sync"
"time"
)
// Direction constants for RecordEvent.
const (
DirInput = 0 // client → server (keystrokes)
DirOutput = 1 // server → client (shell output)
)
// Signal weights for the composite score.
const (
weightTimingVariance = 0.30
weightSpecialKeys = 0.20
weightTypingSpeed = 0.20
weightCommandDiversity = 0.15
weightSessionDuration = 0.15
)
// Scorer accumulates keystroke events and computes a 0.01.0
// human likelihood score based on multiple signals.
type Scorer struct {
mu sync.Mutex
// Input timing data.
inputTimes []time.Time
delays []time.Duration
// Special key counters.
specialKeys int
// Command tracking: we count newlines and unique command prefixes.
currentCmd []byte
commands map[string]struct{}
// Session activity duration.
firstInput time.Time
lastInput time.Time
}
// NewScorer returns a new Scorer ready to record events.
func NewScorer() *Scorer {
return &Scorer{
commands: make(map[string]struct{}),
}
}
// RecordEvent records a data event with timestamp and direction.
// direction should be DirInput (0) for client input or DirOutput (1) for server output.
func (s *Scorer) RecordEvent(ts time.Time, direction int, data []byte) {
if direction != DirInput {
return // only analyze input
}
s.mu.Lock()
defer s.mu.Unlock()
if s.firstInput.IsZero() {
s.firstInput = ts
}
s.lastInput = ts
for _, b := range data {
// Track inter-keystroke delay for single-byte inputs.
if len(s.inputTimes) > 0 {
delay := ts.Sub(s.inputTimes[len(s.inputTimes)-1])
if delay > 0 && delay < 30*time.Second {
s.delays = append(s.delays, delay)
}
}
s.inputTimes = append(s.inputTimes, ts)
// Count special keys.
if isSpecialKey(b) {
s.specialKeys++
}
// Track commands (split on newline/CR).
if b == '\r' || b == '\n' {
cmd := string(s.currentCmd)
if len(cmd) > 0 {
s.commands[cmd] = struct{}{}
}
s.currentCmd = s.currentCmd[:0]
} else {
// Handle backspace: remove last byte from current command.
if b == 0x7f || b == 0x08 {
if len(s.currentCmd) > 0 {
s.currentCmd = s.currentCmd[:len(s.currentCmd)-1]
}
} else if b >= 0x20 { // printable
s.currentCmd = append(s.currentCmd, b)
}
}
}
}
// Score computes the composite human likelihood score (0.01.0).
// Thread-safe.
func (s *Scorer) Score() float64 {
s.mu.Lock()
defer s.mu.Unlock()
if len(s.inputTimes) == 0 {
return 0
}
tv := s.timingVarianceScore()
sk := s.specialKeysScore()
ts := s.typingSpeedScore()
cd := s.commandDiversityScore()
sd := s.sessionDurationScore()
score := tv*weightTimingVariance +
sk*weightSpecialKeys +
ts*weightTypingSpeed +
cd*weightCommandDiversity +
sd*weightSessionDuration
return clamp(score, 0, 1)
}
// timingVarianceScore returns 01 based on coefficient of variation of inter-key delays.
// Bots have CV ≈ 0 (instant or uniform), humans have CV ≥ 0.6.
func (s *Scorer) timingVarianceScore() float64 {
if len(s.delays) < 3 {
return 0
}
mean := meanDuration(s.delays)
if mean == 0 {
return 0
}
variance := 0.0
for _, d := range s.delays {
diff := float64(d) - float64(mean)
variance += diff * diff
}
variance /= float64(len(s.delays))
stddev := math.Sqrt(variance)
cv := stddev / float64(mean)
// Map CV to 01: CV of 0.6+ is fully human-like.
return clamp(cv/0.6, 0, 1)
}
// specialKeysScore returns 01 based on count of special key presses.
// Scripts almost never generate backspace/tab/ctrl characters.
func (s *Scorer) specialKeysScore() float64 {
// 5+ special keys → full score.
return clamp(float64(s.specialKeys)/5.0, 0, 1)
}
// typingSpeedScore returns 01 based on median inter-key delay.
// Paste/scripts have < 5ms, humans have 30300ms.
func (s *Scorer) typingSpeedScore() float64 {
if len(s.delays) < 2 {
return 0
}
med := medianDuration(s.delays)
ms := float64(med) / float64(time.Millisecond)
if ms < 5 {
return 0 // paste or script
}
if ms > 300 {
return 0.7 // very slow, still possibly human
}
if ms >= 30 && ms <= 300 {
return 1.0 // human range
}
// 530ms: transition zone
return clamp((ms-5)/25, 0, 1)
}
// commandDiversityScore returns 01 based on number of unique commands.
func (s *Scorer) commandDiversityScore() float64 {
// 3+ unique commands → full score.
return clamp(float64(len(s.commands))/3.0, 0, 1)
}
// sessionDurationScore returns 01 based on active input duration.
func (s *Scorer) sessionDurationScore() float64 {
if s.firstInput.IsZero() || s.lastInput.IsZero() {
return 0
}
dur := s.lastInput.Sub(s.firstInput)
// 10s+ of active input → full score.
return clamp(float64(dur)/float64(10*time.Second), 0, 1)
}
// isSpecialKey returns true for non-printable keys that humans commonly use.
func isSpecialKey(b byte) bool {
switch b {
case 0x7f, // DEL (backspace in most terminals)
0x08, // BS
0x09, // TAB
0x03, // Ctrl-C
0x04, // Ctrl-D
0x1b: // ESC (arrow keys start with ESC)
return true
}
return false
}
func clamp(v, lo, hi float64) float64 {
if v < lo {
return lo
}
if v > hi {
return hi
}
return v
}
func meanDuration(ds []time.Duration) time.Duration {
if len(ds) == 0 {
return 0
}
var sum time.Duration
for _, d := range ds {
sum += d
}
return sum / time.Duration(len(ds))
}
func medianDuration(ds []time.Duration) time.Duration {
n := len(ds)
if n == 0 {
return 0
}
// Copy to avoid mutating the original.
sorted := make([]time.Duration, n)
copy(sorted, ds)
sortDurations(sorted)
if n%2 == 0 {
return (sorted[n/2-1] + sorted[n/2]) / 2
}
return sorted[n/2]
}
func sortDurations(ds []time.Duration) {
// Simple insertion sort — delay slices are small.
for i := 1; i < len(ds); i++ {
key := ds[i]
j := i - 1
for j >= 0 && ds[j] > key {
ds[j+1] = ds[j]
j--
}
ds[j+1] = key
}
}

View File

@@ -0,0 +1,155 @@
package detection
import (
"sync"
"testing"
"time"
)
func TestScorer_EmptyInput(t *testing.T) {
s := NewScorer()
score := s.Score()
if score != 0 {
t.Errorf("empty scorer: got %f, want 0", score)
}
}
func TestScorer_SingleKeystroke(t *testing.T) {
s := NewScorer()
s.RecordEvent(time.Now(), DirInput, []byte("a"))
score := s.Score()
if score != 0 {
t.Errorf("single keystroke: got %f, want 0", score)
}
}
func TestScorer_BotLikeInput(t *testing.T) {
// Simulate a bot: paste entire commands with uniform tiny delays, no special keys.
s := NewScorer()
now := time.Now()
// Bot pastes "cat /etc/passwd\r" all at once with perfectly uniform timing.
for range 3 {
cmd := []byte("cat /etc/passwd\r")
for _, b := range cmd {
s.RecordEvent(now, DirInput, []byte{b})
now = now.Add(100 * time.Microsecond) // ~0.1ms uniform delay = paste
}
}
score := s.Score()
if score >= 0.3 {
t.Errorf("bot-like input: got %f, want < 0.3", score)
}
}
func TestScorer_HumanLikeInput(t *testing.T) {
// Simulate a human: variable timing, backspaces, diverse commands.
s := NewScorer()
now := time.Now()
type cmd struct {
text string
delay time.Duration // base delay between keys
}
commands := []cmd{
{"ls -la\r", 80 * time.Millisecond},
{"cat /etc/paswd", 120 * time.Millisecond}, // typo
{string([]byte{0x7f}), 200 * time.Millisecond}, // backspace
{"wd\r", 90 * time.Millisecond}, // correction
{"whoami\r", 100 * time.Millisecond},
{"uname -a\r", 150 * time.Millisecond},
{string([]byte{0x09}), 300 * time.Millisecond}, // tab completion
{"pwd\r", 70 * time.Millisecond},
}
for _, c := range commands {
for _, b := range []byte(c.text) {
// Add ±30% jitter to make timing more natural.
jitter := time.Duration(float64(c.delay) * 0.3)
delay := c.delay + jitter // simplified: always add, still variable across commands
s.RecordEvent(now, DirInput, []byte{b})
now = now.Add(delay)
}
// Pause between commands (thinking time).
now = now.Add(2 * time.Second)
}
score := s.Score()
if score <= 0.6 {
t.Errorf("human-like input: got %f, want > 0.6", score)
}
}
func TestScorer_OutputIgnored(t *testing.T) {
s := NewScorer()
now := time.Now()
// Only output events — should not affect score.
for i := 0; i < 100; i++ {
s.RecordEvent(now, DirOutput, []byte("some output\n"))
now = now.Add(10 * time.Millisecond)
}
score := s.Score()
if score != 0 {
t.Errorf("output-only: got %f, want 0", score)
}
}
func TestScorer_ThreadSafety(t *testing.T) {
s := NewScorer()
now := time.Now()
var wg sync.WaitGroup
for i := 0; i < 10; i++ {
wg.Add(1)
go func(offset int) {
defer wg.Done()
for j := 0; j < 100; j++ {
ts := now.Add(time.Duration(offset*100+j) * time.Millisecond)
s.RecordEvent(ts, DirInput, []byte("a"))
}
}(i)
}
// Concurrently read score.
wg.Add(1)
go func() {
defer wg.Done()
for i := 0; i < 50; i++ {
_ = s.Score()
}
}()
wg.Wait()
// Should not panic; score should be valid.
score := s.Score()
if score < 0 || score > 1 {
t.Errorf("concurrent score out of range: %f", score)
}
}
func TestScorer_CommandDiversity(t *testing.T) {
s := NewScorer()
now := time.Now()
// Type 4 different commands with human-ish timing.
cmds := []string{"ls\r", "pwd\r", "id\r", "whoami\r"}
for _, cmd := range cmds {
for _, b := range []byte(cmd) {
s.RecordEvent(now, DirInput, []byte{b})
now = now.Add(100 * time.Millisecond)
}
now = now.Add(time.Second)
}
score := s.Score()
// With 4 unique commands, human timing, and decent duration,
// we should get a meaningful score.
if score < 0.4 {
t.Errorf("diverse commands: got %f, want >= 0.4", score)
}
}