feat: add human detection scoring and webhook notifications
Implement phase 2.1 (human detection) and 2.2 (notifications): - Detection scorer computes 0.0-1.0 human likelihood from keystroke timing variance, special key usage, typing speed, command diversity, and session duration - Webhook notifier sends JSON POST to configured endpoints with deduplication, custom headers, and event filtering - RecordingChannel gains an event callback for feeding keystrokes to the scorer without coupling shell and detection packages - Server wires scorer into session lifecycle with periodic updates and threshold-based notification triggers - Web UI shows human score in session tables with highlighting - New config sections: [detection] and [[notify.webhooks]] Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
259
internal/detection/scorer.go
Normal file
259
internal/detection/scorer.go
Normal file
@@ -0,0 +1,259 @@
|
||||
package detection
|
||||
|
||||
import (
|
||||
"math"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Direction constants for RecordEvent.
|
||||
const (
|
||||
DirInput = 0 // client → server (keystrokes)
|
||||
DirOutput = 1 // server → client (shell output)
|
||||
)
|
||||
|
||||
// Signal weights for the composite score.
|
||||
const (
|
||||
weightTimingVariance = 0.30
|
||||
weightSpecialKeys = 0.20
|
||||
weightTypingSpeed = 0.20
|
||||
weightCommandDiversity = 0.15
|
||||
weightSessionDuration = 0.15
|
||||
)
|
||||
|
||||
// Scorer accumulates keystroke events and computes a 0.0–1.0
|
||||
// human likelihood score based on multiple signals.
|
||||
type Scorer struct {
|
||||
mu sync.Mutex
|
||||
|
||||
// Input timing data.
|
||||
inputTimes []time.Time
|
||||
delays []time.Duration
|
||||
|
||||
// Special key counters.
|
||||
specialKeys int
|
||||
|
||||
// Command tracking: we count newlines and unique command prefixes.
|
||||
currentCmd []byte
|
||||
commands map[string]struct{}
|
||||
|
||||
// Session activity duration.
|
||||
firstInput time.Time
|
||||
lastInput time.Time
|
||||
}
|
||||
|
||||
// NewScorer returns a new Scorer ready to record events.
|
||||
func NewScorer() *Scorer {
|
||||
return &Scorer{
|
||||
commands: make(map[string]struct{}),
|
||||
}
|
||||
}
|
||||
|
||||
// RecordEvent records a data event with timestamp and direction.
|
||||
// direction should be DirInput (0) for client input or DirOutput (1) for server output.
|
||||
func (s *Scorer) RecordEvent(ts time.Time, direction int, data []byte) {
|
||||
if direction != DirInput {
|
||||
return // only analyze input
|
||||
}
|
||||
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
|
||||
if s.firstInput.IsZero() {
|
||||
s.firstInput = ts
|
||||
}
|
||||
s.lastInput = ts
|
||||
|
||||
for _, b := range data {
|
||||
// Track inter-keystroke delay for single-byte inputs.
|
||||
if len(s.inputTimes) > 0 {
|
||||
delay := ts.Sub(s.inputTimes[len(s.inputTimes)-1])
|
||||
if delay > 0 && delay < 30*time.Second {
|
||||
s.delays = append(s.delays, delay)
|
||||
}
|
||||
}
|
||||
s.inputTimes = append(s.inputTimes, ts)
|
||||
|
||||
// Count special keys.
|
||||
if isSpecialKey(b) {
|
||||
s.specialKeys++
|
||||
}
|
||||
|
||||
// Track commands (split on newline/CR).
|
||||
if b == '\r' || b == '\n' {
|
||||
cmd := string(s.currentCmd)
|
||||
if len(cmd) > 0 {
|
||||
s.commands[cmd] = struct{}{}
|
||||
}
|
||||
s.currentCmd = s.currentCmd[:0]
|
||||
} else {
|
||||
// Handle backspace: remove last byte from current command.
|
||||
if b == 0x7f || b == 0x08 {
|
||||
if len(s.currentCmd) > 0 {
|
||||
s.currentCmd = s.currentCmd[:len(s.currentCmd)-1]
|
||||
}
|
||||
} else if b >= 0x20 { // printable
|
||||
s.currentCmd = append(s.currentCmd, b)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Score computes the composite human likelihood score (0.0–1.0).
|
||||
// Thread-safe.
|
||||
func (s *Scorer) Score() float64 {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
|
||||
if len(s.inputTimes) == 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
tv := s.timingVarianceScore()
|
||||
sk := s.specialKeysScore()
|
||||
ts := s.typingSpeedScore()
|
||||
cd := s.commandDiversityScore()
|
||||
sd := s.sessionDurationScore()
|
||||
|
||||
score := tv*weightTimingVariance +
|
||||
sk*weightSpecialKeys +
|
||||
ts*weightTypingSpeed +
|
||||
cd*weightCommandDiversity +
|
||||
sd*weightSessionDuration
|
||||
|
||||
return clamp(score, 0, 1)
|
||||
}
|
||||
|
||||
// timingVarianceScore returns 0–1 based on coefficient of variation of inter-key delays.
|
||||
// Bots have CV ≈ 0 (instant or uniform), humans have CV ≥ 0.6.
|
||||
func (s *Scorer) timingVarianceScore() float64 {
|
||||
if len(s.delays) < 3 {
|
||||
return 0
|
||||
}
|
||||
|
||||
mean := meanDuration(s.delays)
|
||||
if mean == 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
variance := 0.0
|
||||
for _, d := range s.delays {
|
||||
diff := float64(d) - float64(mean)
|
||||
variance += diff * diff
|
||||
}
|
||||
variance /= float64(len(s.delays))
|
||||
stddev := math.Sqrt(variance)
|
||||
cv := stddev / float64(mean)
|
||||
|
||||
// Map CV to 0–1: CV of 0.6+ is fully human-like.
|
||||
return clamp(cv/0.6, 0, 1)
|
||||
}
|
||||
|
||||
// specialKeysScore returns 0–1 based on count of special key presses.
|
||||
// Scripts almost never generate backspace/tab/ctrl characters.
|
||||
func (s *Scorer) specialKeysScore() float64 {
|
||||
// 5+ special keys → full score.
|
||||
return clamp(float64(s.specialKeys)/5.0, 0, 1)
|
||||
}
|
||||
|
||||
// typingSpeedScore returns 0–1 based on median inter-key delay.
|
||||
// Paste/scripts have < 5ms, humans have 30–300ms.
|
||||
func (s *Scorer) typingSpeedScore() float64 {
|
||||
if len(s.delays) < 2 {
|
||||
return 0
|
||||
}
|
||||
|
||||
med := medianDuration(s.delays)
|
||||
ms := float64(med) / float64(time.Millisecond)
|
||||
|
||||
if ms < 5 {
|
||||
return 0 // paste or script
|
||||
}
|
||||
if ms > 300 {
|
||||
return 0.7 // very slow, still possibly human
|
||||
}
|
||||
if ms >= 30 && ms <= 300 {
|
||||
return 1.0 // human range
|
||||
}
|
||||
// 5–30ms: transition zone
|
||||
return clamp((ms-5)/25, 0, 1)
|
||||
}
|
||||
|
||||
// commandDiversityScore returns 0–1 based on number of unique commands.
|
||||
func (s *Scorer) commandDiversityScore() float64 {
|
||||
// 3+ unique commands → full score.
|
||||
return clamp(float64(len(s.commands))/3.0, 0, 1)
|
||||
}
|
||||
|
||||
// sessionDurationScore returns 0–1 based on active input duration.
|
||||
func (s *Scorer) sessionDurationScore() float64 {
|
||||
if s.firstInput.IsZero() || s.lastInput.IsZero() {
|
||||
return 0
|
||||
}
|
||||
dur := s.lastInput.Sub(s.firstInput)
|
||||
// 10s+ of active input → full score.
|
||||
return clamp(float64(dur)/float64(10*time.Second), 0, 1)
|
||||
}
|
||||
|
||||
// isSpecialKey returns true for non-printable keys that humans commonly use.
|
||||
func isSpecialKey(b byte) bool {
|
||||
switch b {
|
||||
case 0x7f, // DEL (backspace in most terminals)
|
||||
0x08, // BS
|
||||
0x09, // TAB
|
||||
0x03, // Ctrl-C
|
||||
0x04, // Ctrl-D
|
||||
0x1b: // ESC (arrow keys start with ESC)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func clamp(v, lo, hi float64) float64 {
|
||||
if v < lo {
|
||||
return lo
|
||||
}
|
||||
if v > hi {
|
||||
return hi
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
||||
func meanDuration(ds []time.Duration) time.Duration {
|
||||
if len(ds) == 0 {
|
||||
return 0
|
||||
}
|
||||
var sum time.Duration
|
||||
for _, d := range ds {
|
||||
sum += d
|
||||
}
|
||||
return sum / time.Duration(len(ds))
|
||||
}
|
||||
|
||||
func medianDuration(ds []time.Duration) time.Duration {
|
||||
n := len(ds)
|
||||
if n == 0 {
|
||||
return 0
|
||||
}
|
||||
// Copy to avoid mutating the original.
|
||||
sorted := make([]time.Duration, n)
|
||||
copy(sorted, ds)
|
||||
sortDurations(sorted)
|
||||
if n%2 == 0 {
|
||||
return (sorted[n/2-1] + sorted[n/2]) / 2
|
||||
}
|
||||
return sorted[n/2]
|
||||
}
|
||||
|
||||
func sortDurations(ds []time.Duration) {
|
||||
// Simple insertion sort — delay slices are small.
|
||||
for i := 1; i < len(ds); i++ {
|
||||
key := ds[i]
|
||||
j := i - 1
|
||||
for j >= 0 && ds[j] > key {
|
||||
ds[j+1] = ds[j]
|
||||
j--
|
||||
}
|
||||
ds[j+1] = key
|
||||
}
|
||||
}
|
||||
155
internal/detection/scorer_test.go
Normal file
155
internal/detection/scorer_test.go
Normal file
@@ -0,0 +1,155 @@
|
||||
package detection
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestScorer_EmptyInput(t *testing.T) {
|
||||
s := NewScorer()
|
||||
score := s.Score()
|
||||
if score != 0 {
|
||||
t.Errorf("empty scorer: got %f, want 0", score)
|
||||
}
|
||||
}
|
||||
|
||||
func TestScorer_SingleKeystroke(t *testing.T) {
|
||||
s := NewScorer()
|
||||
s.RecordEvent(time.Now(), DirInput, []byte("a"))
|
||||
score := s.Score()
|
||||
if score != 0 {
|
||||
t.Errorf("single keystroke: got %f, want 0", score)
|
||||
}
|
||||
}
|
||||
|
||||
func TestScorer_BotLikeInput(t *testing.T) {
|
||||
// Simulate a bot: paste entire commands with uniform tiny delays, no special keys.
|
||||
s := NewScorer()
|
||||
now := time.Now()
|
||||
|
||||
// Bot pastes "cat /etc/passwd\r" all at once with perfectly uniform timing.
|
||||
for range 3 {
|
||||
cmd := []byte("cat /etc/passwd\r")
|
||||
for _, b := range cmd {
|
||||
s.RecordEvent(now, DirInput, []byte{b})
|
||||
now = now.Add(100 * time.Microsecond) // ~0.1ms uniform delay = paste
|
||||
}
|
||||
}
|
||||
|
||||
score := s.Score()
|
||||
if score >= 0.3 {
|
||||
t.Errorf("bot-like input: got %f, want < 0.3", score)
|
||||
}
|
||||
}
|
||||
|
||||
func TestScorer_HumanLikeInput(t *testing.T) {
|
||||
// Simulate a human: variable timing, backspaces, diverse commands.
|
||||
s := NewScorer()
|
||||
now := time.Now()
|
||||
|
||||
type cmd struct {
|
||||
text string
|
||||
delay time.Duration // base delay between keys
|
||||
}
|
||||
|
||||
commands := []cmd{
|
||||
{"ls -la\r", 80 * time.Millisecond},
|
||||
{"cat /etc/paswd", 120 * time.Millisecond}, // typo
|
||||
{string([]byte{0x7f}), 200 * time.Millisecond}, // backspace
|
||||
{"wd\r", 90 * time.Millisecond}, // correction
|
||||
{"whoami\r", 100 * time.Millisecond},
|
||||
{"uname -a\r", 150 * time.Millisecond},
|
||||
{string([]byte{0x09}), 300 * time.Millisecond}, // tab completion
|
||||
{"pwd\r", 70 * time.Millisecond},
|
||||
}
|
||||
|
||||
for _, c := range commands {
|
||||
for _, b := range []byte(c.text) {
|
||||
// Add ±30% jitter to make timing more natural.
|
||||
jitter := time.Duration(float64(c.delay) * 0.3)
|
||||
delay := c.delay + jitter // simplified: always add, still variable across commands
|
||||
s.RecordEvent(now, DirInput, []byte{b})
|
||||
now = now.Add(delay)
|
||||
}
|
||||
// Pause between commands (thinking time).
|
||||
now = now.Add(2 * time.Second)
|
||||
}
|
||||
|
||||
score := s.Score()
|
||||
if score <= 0.6 {
|
||||
t.Errorf("human-like input: got %f, want > 0.6", score)
|
||||
}
|
||||
}
|
||||
|
||||
func TestScorer_OutputIgnored(t *testing.T) {
|
||||
s := NewScorer()
|
||||
now := time.Now()
|
||||
|
||||
// Only output events — should not affect score.
|
||||
for i := 0; i < 100; i++ {
|
||||
s.RecordEvent(now, DirOutput, []byte("some output\n"))
|
||||
now = now.Add(10 * time.Millisecond)
|
||||
}
|
||||
|
||||
score := s.Score()
|
||||
if score != 0 {
|
||||
t.Errorf("output-only: got %f, want 0", score)
|
||||
}
|
||||
}
|
||||
|
||||
func TestScorer_ThreadSafety(t *testing.T) {
|
||||
s := NewScorer()
|
||||
now := time.Now()
|
||||
|
||||
var wg sync.WaitGroup
|
||||
for i := 0; i < 10; i++ {
|
||||
wg.Add(1)
|
||||
go func(offset int) {
|
||||
defer wg.Done()
|
||||
for j := 0; j < 100; j++ {
|
||||
ts := now.Add(time.Duration(offset*100+j) * time.Millisecond)
|
||||
s.RecordEvent(ts, DirInput, []byte("a"))
|
||||
}
|
||||
}(i)
|
||||
}
|
||||
|
||||
// Concurrently read score.
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for i := 0; i < 50; i++ {
|
||||
_ = s.Score()
|
||||
}
|
||||
}()
|
||||
|
||||
wg.Wait()
|
||||
|
||||
// Should not panic; score should be valid.
|
||||
score := s.Score()
|
||||
if score < 0 || score > 1 {
|
||||
t.Errorf("concurrent score out of range: %f", score)
|
||||
}
|
||||
}
|
||||
|
||||
func TestScorer_CommandDiversity(t *testing.T) {
|
||||
s := NewScorer()
|
||||
now := time.Now()
|
||||
|
||||
// Type 4 different commands with human-ish timing.
|
||||
cmds := []string{"ls\r", "pwd\r", "id\r", "whoami\r"}
|
||||
for _, cmd := range cmds {
|
||||
for _, b := range []byte(cmd) {
|
||||
s.RecordEvent(now, DirInput, []byte{b})
|
||||
now = now.Add(100 * time.Millisecond)
|
||||
}
|
||||
now = now.Add(time.Second)
|
||||
}
|
||||
|
||||
score := s.Score()
|
||||
// With 4 unique commands, human timing, and decent duration,
|
||||
// we should get a meaningful score.
|
||||
if score < 0.4 {
|
||||
t.Errorf("diverse commands: got %f, want >= 0.4", score)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user