feat: add lab-monitoring MCP server for Prometheus and Alertmanager
New MCP server that queries live Prometheus and Alertmanager HTTP APIs with 8 tools: list_alerts, get_alert, search_metrics, get_metric_metadata, query (PromQL), list_targets, list_silences, and create_silence. Extends the MCP core with ModeCustom and NewGenericServer for servers that don't require a database. Includes CLI with direct commands (alerts, query, targets, metrics), NixOS module, and comprehensive httptest-based tests. Bumps existing binaries to 0.2.1 due to shared internal/mcp change. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
13
.mcp.json
13
.mcp.json
@@ -25,6 +25,19 @@
|
|||||||
"env": {
|
"env": {
|
||||||
"NIXPKGS_SEARCH_DATABASE": "sqlite://:memory:"
|
"NIXPKGS_SEARCH_DATABASE": "sqlite://:memory:"
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
"lab-monitoring": {
|
||||||
|
"command": "nix",
|
||||||
|
"args": [
|
||||||
|
"run",
|
||||||
|
".#lab-monitoring",
|
||||||
|
"--",
|
||||||
|
"serve"
|
||||||
|
],
|
||||||
|
"env": {
|
||||||
|
"PROMETHEUS_URL": "http://localhost:9090",
|
||||||
|
"ALERTMANAGER_URL": "http://localhost:9093"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
68
CLAUDE.md
68
CLAUDE.md
@@ -20,7 +20,12 @@ Search and query NixOS configuration options. Uses nixpkgs as source.
|
|||||||
### Home Manager Options (`hm-options`)
|
### Home Manager Options (`hm-options`)
|
||||||
Search and query Home Manager configuration options. Uses home-manager repository as source.
|
Search and query Home Manager configuration options. Uses home-manager repository as source.
|
||||||
|
|
||||||
All servers share the same architecture:
|
### Lab Monitoring (`lab-monitoring`)
|
||||||
|
Query Prometheus metrics and Alertmanager alerts. Unlike other servers, this queries live HTTP APIs — no database or indexing needed.
|
||||||
|
- 8 tools: list/get alerts, search metrics, get metadata, PromQL query, list targets, list/create silences
|
||||||
|
- Configurable Prometheus and Alertmanager URLs via flags or environment variables
|
||||||
|
|
||||||
|
The nixpkgs/options/hm servers share a database-backed architecture:
|
||||||
- Full-text search across option/package names and descriptions
|
- Full-text search across option/package names and descriptions
|
||||||
- Query specific options/packages with full metadata
|
- Query specific options/packages with full metadata
|
||||||
- Index multiple revisions (by git hash or channel name)
|
- Index multiple revisions (by git hash or channel name)
|
||||||
@@ -38,8 +43,9 @@ All servers share the same architecture:
|
|||||||
## Project Status
|
## Project Status
|
||||||
|
|
||||||
**Complete and maintained** - All core features implemented:
|
**Complete and maintained** - All core features implemented:
|
||||||
- Full MCP servers with 6 tools each
|
- Full MCP servers (6 tools each for nixpkgs/options, 8 tools for monitoring)
|
||||||
- PostgreSQL and SQLite backends with FTS
|
- PostgreSQL and SQLite backends with FTS (for nixpkgs/options servers)
|
||||||
|
- Live API queries for Prometheus/Alertmanager (monitoring server)
|
||||||
- NixOS modules for deployment
|
- NixOS modules for deployment
|
||||||
- CLI for manual operations
|
- CLI for manual operations
|
||||||
- Comprehensive test suite
|
- Comprehensive test suite
|
||||||
@@ -53,8 +59,10 @@ labmcp/
|
|||||||
│ │ └── main.go # Combined options+packages CLI (primary)
|
│ │ └── main.go # Combined options+packages CLI (primary)
|
||||||
│ ├── nixos-options/
|
│ ├── nixos-options/
|
||||||
│ │ └── main.go # NixOS options CLI (legacy)
|
│ │ └── main.go # NixOS options CLI (legacy)
|
||||||
│ └── hm-options/
|
│ ├── hm-options/
|
||||||
│ └── main.go # Home Manager options CLI
|
│ │ └── main.go # Home Manager options CLI
|
||||||
|
│ └── lab-monitoring/
|
||||||
|
│ └── main.go # Prometheus/Alertmanager CLI
|
||||||
├── internal/
|
├── internal/
|
||||||
│ ├── database/
|
│ ├── database/
|
||||||
│ │ ├── interface.go # Store interface (options + packages)
|
│ │ ├── interface.go # Store interface (options + packages)
|
||||||
@@ -82,14 +90,22 @@ labmcp/
|
|||||||
│ │ ├── indexer.go # Home Manager indexing
|
│ │ ├── indexer.go # Home Manager indexing
|
||||||
│ │ ├── types.go # Channel aliases, extensions
|
│ │ ├── types.go # Channel aliases, extensions
|
||||||
│ │ └── *_test.go # Indexer tests
|
│ │ └── *_test.go # Indexer tests
|
||||||
│ └── packages/
|
│ ├── packages/
|
||||||
│ ├── indexer.go # Nix packages indexing
|
│ │ ├── indexer.go # Nix packages indexing
|
||||||
│ ├── parser.go # nix-env JSON parsing
|
│ │ ├── parser.go # nix-env JSON parsing
|
||||||
│ ├── types.go # Package types, channel aliases
|
│ │ ├── types.go # Package types, channel aliases
|
||||||
│ └── *_test.go # Parser tests
|
│ │ └── *_test.go # Parser tests
|
||||||
|
│ └── monitoring/
|
||||||
|
│ ├── types.go # Prometheus/Alertmanager API types
|
||||||
|
│ ├── prometheus.go # Prometheus HTTP client
|
||||||
|
│ ├── alertmanager.go # Alertmanager HTTP client
|
||||||
|
│ ├── handlers.go # MCP tool definitions + handlers
|
||||||
|
│ ├── format.go # Markdown formatting utilities
|
||||||
|
│ └── *_test.go # Tests (httptest-based)
|
||||||
├── nix/
|
├── nix/
|
||||||
│ ├── module.nix # NixOS module for nixos-options
|
│ ├── module.nix # NixOS module for nixos-options
|
||||||
│ ├── hm-options-module.nix # NixOS module for hm-options
|
│ ├── hm-options-module.nix # NixOS module for hm-options
|
||||||
|
│ ├── lab-monitoring-module.nix # NixOS module for lab-monitoring
|
||||||
│ └── package.nix # Parameterized Nix package
|
│ └── package.nix # Parameterized Nix package
|
||||||
├── testdata/
|
├── testdata/
|
||||||
│ └── options-sample.json # Test fixture
|
│ └── options-sample.json # Test fixture
|
||||||
@@ -124,6 +140,19 @@ labmcp/
|
|||||||
| `list_revisions` | List all indexed revisions |
|
| `list_revisions` | List all indexed revisions |
|
||||||
| `delete_revision` | Delete an indexed revision |
|
| `delete_revision` | Delete an indexed revision |
|
||||||
|
|
||||||
|
### Monitoring Server (lab-monitoring)
|
||||||
|
|
||||||
|
| Tool | Description |
|
||||||
|
|------|-------------|
|
||||||
|
| `list_alerts` | List alerts with optional filters (state, severity, receiver) |
|
||||||
|
| `get_alert` | Get full details for a specific alert by fingerprint |
|
||||||
|
| `search_metrics` | Search metric names with substring filter, enriched with metadata |
|
||||||
|
| `get_metric_metadata` | Get type, help text, and unit for a specific metric |
|
||||||
|
| `query` | Execute instant PromQL query |
|
||||||
|
| `list_targets` | List scrape targets with health status |
|
||||||
|
| `list_silences` | List active/pending silences |
|
||||||
|
| `create_silence` | Create a silence (confirms with user first) |
|
||||||
|
|
||||||
## Key Implementation Details
|
## Key Implementation Details
|
||||||
|
|
||||||
### Database
|
### Database
|
||||||
@@ -212,6 +241,17 @@ hm-options delete <revision> # Delete indexed revision
|
|||||||
hm-options --version # Show version
|
hm-options --version # Show version
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### lab-monitoring
|
||||||
|
```bash
|
||||||
|
lab-monitoring serve # Run MCP server on STDIO
|
||||||
|
lab-monitoring serve --transport http # Run MCP server on HTTP
|
||||||
|
lab-monitoring alerts # List alerts
|
||||||
|
lab-monitoring alerts --state active # Filter by state
|
||||||
|
lab-monitoring query 'up' # Instant PromQL query
|
||||||
|
lab-monitoring targets # List scrape targets
|
||||||
|
lab-monitoring metrics node # Search metric names
|
||||||
|
```
|
||||||
|
|
||||||
### Channel Aliases
|
### Channel Aliases
|
||||||
|
|
||||||
**nixpkgs-search/nixos-options**: `nixos-unstable`, `nixos-stable`, `nixos-24.11`, `nixos-24.05`, etc.
|
**nixpkgs-search/nixos-options**: `nixos-unstable`, `nixos-stable`, `nixos-24.11`, `nixos-24.05`, etc.
|
||||||
@@ -220,6 +260,9 @@ hm-options --version # Show version
|
|||||||
|
|
||||||
## Notes for Claude
|
## Notes for Claude
|
||||||
|
|
||||||
|
### Planning
|
||||||
|
When creating implementation plans, the first step should usually be to **checkout an appropriately named feature branch** (e.g., `git checkout -b feature/lab-monitoring`). This keeps work isolated and makes PRs cleaner.
|
||||||
|
|
||||||
### Development Workflow
|
### Development Workflow
|
||||||
- **Always run `go fmt ./...` before committing Go code**
|
- **Always run `go fmt ./...` before committing Go code**
|
||||||
- **Run Go commands using `nix develop -c`** (e.g., `nix develop -c go test ./...`)
|
- **Run Go commands using `nix develop -c`** (e.g., `nix develop -c go test ./...`)
|
||||||
@@ -257,7 +300,8 @@ Version is defined in multiple places that must stay in sync:
|
|||||||
- `cmd/nixpkgs-search/main.go`
|
- `cmd/nixpkgs-search/main.go`
|
||||||
- `cmd/nixos-options/main.go`
|
- `cmd/nixos-options/main.go`
|
||||||
- `cmd/hm-options/main.go`
|
- `cmd/hm-options/main.go`
|
||||||
- `internal/mcp/server.go` (in `DefaultNixOSConfig`, `DefaultHomeManagerConfig`, `DefaultNixpkgsPackagesConfig`)
|
- `cmd/lab-monitoring/main.go`
|
||||||
|
- `internal/mcp/server.go` (in `DefaultNixOSConfig`, `DefaultHomeManagerConfig`, `DefaultNixpkgsPackagesConfig`, `DefaultMonitoringConfig`)
|
||||||
- `nix/package.nix`
|
- `nix/package.nix`
|
||||||
|
|
||||||
### User Preferences
|
### User Preferences
|
||||||
@@ -284,6 +328,7 @@ nix develop -c go test -bench=. -benchtime=1x -timeout=30m ./internal/homemanage
|
|||||||
nix build .#nixpkgs-search
|
nix build .#nixpkgs-search
|
||||||
nix build .#nixos-options
|
nix build .#nixos-options
|
||||||
nix build .#hm-options
|
nix build .#hm-options
|
||||||
|
nix build .#lab-monitoring
|
||||||
|
|
||||||
# Run directly
|
# Run directly
|
||||||
nix run .#nixpkgs-search -- options serve
|
nix run .#nixpkgs-search -- options serve
|
||||||
@@ -291,6 +336,7 @@ nix run .#nixpkgs-search -- packages serve
|
|||||||
nix run .#nixpkgs-search -- index nixos-unstable
|
nix run .#nixpkgs-search -- index nixos-unstable
|
||||||
nix run .#hm-options -- serve
|
nix run .#hm-options -- serve
|
||||||
nix run .#hm-options -- index hm-unstable
|
nix run .#hm-options -- index hm-unstable
|
||||||
|
nix run .#lab-monitoring -- serve
|
||||||
```
|
```
|
||||||
|
|
||||||
### Indexing Performance
|
### Indexing Performance
|
||||||
|
|||||||
1
TODO.md
1
TODO.md
@@ -15,6 +15,7 @@
|
|||||||
## New MCP Servers
|
## New MCP Servers
|
||||||
|
|
||||||
- [x] `nixpkgs-packages` - Index and search nixpkgs packages (implemented in `nixpkgs-search packages`)
|
- [x] `nixpkgs-packages` - Index and search nixpkgs packages (implemented in `nixpkgs-search packages`)
|
||||||
|
- [x] `lab-monitoring` - Query Prometheus and Alertmanager APIs (8 tools, no database required)
|
||||||
|
|
||||||
## Nice to Have
|
## Nice to Have
|
||||||
|
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ import (
|
|||||||
|
|
||||||
const (
|
const (
|
||||||
defaultDatabase = "sqlite://hm-options.db"
|
defaultDatabase = "sqlite://hm-options.db"
|
||||||
version = "0.2.0"
|
version = "0.2.1"
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
|
|||||||
369
cmd/lab-monitoring/main.go
Normal file
369
cmd/lab-monitoring/main.go
Normal file
@@ -0,0 +1,369 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"log"
|
||||||
|
"os"
|
||||||
|
"os/signal"
|
||||||
|
"syscall"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/urfave/cli/v2"
|
||||||
|
|
||||||
|
"git.t-juice.club/torjus/labmcp/internal/mcp"
|
||||||
|
"git.t-juice.club/torjus/labmcp/internal/monitoring"
|
||||||
|
)
|
||||||
|
|
||||||
|
const version = "0.1.0"
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
app := &cli.App{
|
||||||
|
Name: "lab-monitoring",
|
||||||
|
Usage: "MCP server for Prometheus and Alertmanager monitoring",
|
||||||
|
Version: version,
|
||||||
|
Flags: []cli.Flag{
|
||||||
|
&cli.StringFlag{
|
||||||
|
Name: "prometheus-url",
|
||||||
|
Usage: "Prometheus base URL",
|
||||||
|
EnvVars: []string{"PROMETHEUS_URL"},
|
||||||
|
Value: "http://localhost:9090",
|
||||||
|
},
|
||||||
|
&cli.StringFlag{
|
||||||
|
Name: "alertmanager-url",
|
||||||
|
Usage: "Alertmanager base URL",
|
||||||
|
EnvVars: []string{"ALERTMANAGER_URL"},
|
||||||
|
Value: "http://localhost:9093",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Commands: []*cli.Command{
|
||||||
|
serveCommand(),
|
||||||
|
alertsCommand(),
|
||||||
|
queryCommand(),
|
||||||
|
targetsCommand(),
|
||||||
|
metricsCommand(),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := app.Run(os.Args); err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func serveCommand() *cli.Command {
|
||||||
|
return &cli.Command{
|
||||||
|
Name: "serve",
|
||||||
|
Usage: "Run MCP server for lab monitoring",
|
||||||
|
Flags: []cli.Flag{
|
||||||
|
&cli.StringFlag{
|
||||||
|
Name: "transport",
|
||||||
|
Aliases: []string{"t"},
|
||||||
|
Usage: "Transport type: 'stdio' or 'http'",
|
||||||
|
Value: "stdio",
|
||||||
|
},
|
||||||
|
&cli.StringFlag{
|
||||||
|
Name: "http-address",
|
||||||
|
Usage: "HTTP listen address",
|
||||||
|
Value: "127.0.0.1:8084",
|
||||||
|
},
|
||||||
|
&cli.StringFlag{
|
||||||
|
Name: "http-endpoint",
|
||||||
|
Usage: "HTTP endpoint path",
|
||||||
|
Value: "/mcp",
|
||||||
|
},
|
||||||
|
&cli.StringSliceFlag{
|
||||||
|
Name: "allowed-origins",
|
||||||
|
Usage: "Allowed Origin headers for CORS",
|
||||||
|
},
|
||||||
|
&cli.StringFlag{
|
||||||
|
Name: "tls-cert",
|
||||||
|
Usage: "TLS certificate file",
|
||||||
|
},
|
||||||
|
&cli.StringFlag{
|
||||||
|
Name: "tls-key",
|
||||||
|
Usage: "TLS key file",
|
||||||
|
},
|
||||||
|
&cli.DurationFlag{
|
||||||
|
Name: "session-ttl",
|
||||||
|
Usage: "Session TTL for HTTP transport",
|
||||||
|
Value: 30 * time.Minute,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Action: func(c *cli.Context) error {
|
||||||
|
return runServe(c)
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func alertsCommand() *cli.Command {
|
||||||
|
return &cli.Command{
|
||||||
|
Name: "alerts",
|
||||||
|
Usage: "List alerts from Alertmanager",
|
||||||
|
Flags: []cli.Flag{
|
||||||
|
&cli.StringFlag{
|
||||||
|
Name: "state",
|
||||||
|
Usage: "Filter by state: active, suppressed, unprocessed",
|
||||||
|
},
|
||||||
|
&cli.StringFlag{
|
||||||
|
Name: "severity",
|
||||||
|
Usage: "Filter by severity label",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Action: func(c *cli.Context) error {
|
||||||
|
return runAlerts(c)
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func queryCommand() *cli.Command {
|
||||||
|
return &cli.Command{
|
||||||
|
Name: "query",
|
||||||
|
Usage: "Execute an instant PromQL query",
|
||||||
|
ArgsUsage: "<promql>",
|
||||||
|
Action: func(c *cli.Context) error {
|
||||||
|
if c.NArg() < 1 {
|
||||||
|
return fmt.Errorf("promql expression required")
|
||||||
|
}
|
||||||
|
return runQuery(c, c.Args().First())
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func targetsCommand() *cli.Command {
|
||||||
|
return &cli.Command{
|
||||||
|
Name: "targets",
|
||||||
|
Usage: "List scrape targets",
|
||||||
|
Action: func(c *cli.Context) error {
|
||||||
|
return runTargets(c)
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func metricsCommand() *cli.Command {
|
||||||
|
return &cli.Command{
|
||||||
|
Name: "metrics",
|
||||||
|
Usage: "Search metric names",
|
||||||
|
ArgsUsage: "<search>",
|
||||||
|
Flags: []cli.Flag{
|
||||||
|
&cli.IntFlag{
|
||||||
|
Name: "limit",
|
||||||
|
Aliases: []string{"n"},
|
||||||
|
Usage: "Maximum number of results",
|
||||||
|
Value: 50,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Action: func(c *cli.Context) error {
|
||||||
|
query := ""
|
||||||
|
if c.NArg() > 0 {
|
||||||
|
query = c.Args().First()
|
||||||
|
}
|
||||||
|
return runMetrics(c, query)
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func runServe(c *cli.Context) error {
|
||||||
|
ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
logger := log.New(os.Stderr, "[mcp] ", log.LstdFlags)
|
||||||
|
config := mcp.DefaultMonitoringConfig()
|
||||||
|
server := mcp.NewGenericServer(logger, config)
|
||||||
|
|
||||||
|
prom := monitoring.NewPrometheusClient(c.String("prometheus-url"))
|
||||||
|
am := monitoring.NewAlertmanagerClient(c.String("alertmanager-url"))
|
||||||
|
monitoring.RegisterHandlers(server, prom, am)
|
||||||
|
|
||||||
|
transport := c.String("transport")
|
||||||
|
switch transport {
|
||||||
|
case "stdio":
|
||||||
|
logger.Println("Starting lab-monitoring MCP server on stdio...")
|
||||||
|
return server.Run(ctx, os.Stdin, os.Stdout)
|
||||||
|
|
||||||
|
case "http":
|
||||||
|
httpConfig := mcp.HTTPConfig{
|
||||||
|
Address: c.String("http-address"),
|
||||||
|
Endpoint: c.String("http-endpoint"),
|
||||||
|
AllowedOrigins: c.StringSlice("allowed-origins"),
|
||||||
|
SessionTTL: c.Duration("session-ttl"),
|
||||||
|
TLSCertFile: c.String("tls-cert"),
|
||||||
|
TLSKeyFile: c.String("tls-key"),
|
||||||
|
}
|
||||||
|
httpTransport := mcp.NewHTTPTransport(server, httpConfig)
|
||||||
|
return httpTransport.Run(ctx)
|
||||||
|
|
||||||
|
default:
|
||||||
|
return fmt.Errorf("unknown transport: %s (use 'stdio' or 'http')", transport)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func runAlerts(c *cli.Context) error {
|
||||||
|
ctx := context.Background()
|
||||||
|
am := monitoring.NewAlertmanagerClient(c.String("alertmanager-url"))
|
||||||
|
|
||||||
|
filters := monitoring.AlertFilters{}
|
||||||
|
if state := c.String("state"); state != "" {
|
||||||
|
switch state {
|
||||||
|
case "active":
|
||||||
|
active := true
|
||||||
|
filters.Active = &active
|
||||||
|
silenced := false
|
||||||
|
filters.Silenced = &silenced
|
||||||
|
inhibited := false
|
||||||
|
filters.Inhibited = &inhibited
|
||||||
|
case "suppressed":
|
||||||
|
active := false
|
||||||
|
filters.Active = &active
|
||||||
|
case "unprocessed":
|
||||||
|
unprocessed := true
|
||||||
|
filters.Unprocessed = &unprocessed
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if severity := c.String("severity"); severity != "" {
|
||||||
|
filters.Filter = append(filters.Filter, fmt.Sprintf(`severity="%s"`, severity))
|
||||||
|
}
|
||||||
|
|
||||||
|
alerts, err := am.ListAlerts(ctx, filters)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to list alerts: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(alerts) == 0 {
|
||||||
|
fmt.Println("No alerts found.")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, a := range alerts {
|
||||||
|
state := a.Status.State
|
||||||
|
severity := a.Labels["severity"]
|
||||||
|
name := a.Labels["alertname"]
|
||||||
|
fmt.Printf("[%s] %s (severity=%s, fingerprint=%s)\n", state, name, severity, a.Fingerprint)
|
||||||
|
for k, v := range a.Annotations {
|
||||||
|
fmt.Printf(" %s: %s\n", k, v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func runQuery(c *cli.Context, promql string) error {
|
||||||
|
ctx := context.Background()
|
||||||
|
prom := monitoring.NewPrometheusClient(c.String("prometheus-url"))
|
||||||
|
|
||||||
|
data, err := prom.Query(ctx, promql, time.Time{})
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("query failed: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, r := range data.Result {
|
||||||
|
labels := ""
|
||||||
|
for k, v := range r.Metric {
|
||||||
|
if labels != "" {
|
||||||
|
labels += ", "
|
||||||
|
}
|
||||||
|
labels += fmt.Sprintf("%s=%q", k, v)
|
||||||
|
}
|
||||||
|
value := ""
|
||||||
|
if len(r.Value) >= 2 {
|
||||||
|
if v, ok := r.Value[1].(string); ok {
|
||||||
|
value = v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fmt.Printf("{%s} %s\n", labels, value)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func runTargets(c *cli.Context) error {
|
||||||
|
ctx := context.Background()
|
||||||
|
prom := monitoring.NewPrometheusClient(c.String("prometheus-url"))
|
||||||
|
|
||||||
|
data, err := prom.Targets(ctx)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to fetch targets: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(data.ActiveTargets) == 0 {
|
||||||
|
fmt.Println("No active targets.")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, t := range data.ActiveTargets {
|
||||||
|
job := t.Labels["job"]
|
||||||
|
instance := t.Labels["instance"]
|
||||||
|
fmt.Printf("[%s] %s/%s (last scrape: %s, duration: %.3fs)\n",
|
||||||
|
t.Health, job, instance, t.LastScrape.Format("15:04:05"), t.LastScrapeDuration)
|
||||||
|
if t.LastError != "" {
|
||||||
|
fmt.Printf(" error: %s\n", t.LastError)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func runMetrics(c *cli.Context, query string) error {
|
||||||
|
ctx := context.Background()
|
||||||
|
prom := monitoring.NewPrometheusClient(c.String("prometheus-url"))
|
||||||
|
|
||||||
|
names, err := prom.LabelValues(ctx, "__name__")
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to fetch metric names: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
limit := c.Int("limit")
|
||||||
|
count := 0
|
||||||
|
for _, name := range names {
|
||||||
|
if query != "" {
|
||||||
|
// Simple case-insensitive substring match
|
||||||
|
if !containsIgnoreCase(name, query) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fmt.Println(name)
|
||||||
|
count++
|
||||||
|
if count >= limit {
|
||||||
|
fmt.Printf("... (showing %d of matching metrics, use --limit to see more)\n", limit)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if count == 0 {
|
||||||
|
fmt.Printf("No metrics found matching '%s'\n", query)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func containsIgnoreCase(s, substr string) bool {
|
||||||
|
sLower := make([]byte, len(s))
|
||||||
|
subLower := make([]byte, len(substr))
|
||||||
|
for i := range s {
|
||||||
|
if s[i] >= 'A' && s[i] <= 'Z' {
|
||||||
|
sLower[i] = s[i] + 32
|
||||||
|
} else {
|
||||||
|
sLower[i] = s[i]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for i := range substr {
|
||||||
|
if substr[i] >= 'A' && substr[i] <= 'Z' {
|
||||||
|
subLower[i] = substr[i] + 32
|
||||||
|
} else {
|
||||||
|
subLower[i] = substr[i]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := 0; i <= len(sLower)-len(subLower); i++ {
|
||||||
|
match := true
|
||||||
|
for j := range subLower {
|
||||||
|
if sLower[i+j] != subLower[j] {
|
||||||
|
match = false
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if match {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
@@ -19,7 +19,7 @@ import (
|
|||||||
|
|
||||||
const (
|
const (
|
||||||
defaultDatabase = "sqlite://nixos-options.db"
|
defaultDatabase = "sqlite://nixos-options.db"
|
||||||
version = "0.2.0"
|
version = "0.2.1"
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ import (
|
|||||||
|
|
||||||
const (
|
const (
|
||||||
defaultDatabase = "sqlite://nixpkgs-search.db"
|
defaultDatabase = "sqlite://nixpkgs-search.db"
|
||||||
version = "0.2.0"
|
version = "0.2.1"
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
|
|||||||
11
flake.nix
11
flake.nix
@@ -33,6 +33,13 @@
|
|||||||
mainProgram = "nixpkgs-search";
|
mainProgram = "nixpkgs-search";
|
||||||
description = "Search nixpkgs options and packages";
|
description = "Search nixpkgs options and packages";
|
||||||
};
|
};
|
||||||
|
lab-monitoring = pkgs.callPackage ./nix/package.nix {
|
||||||
|
src = ./.;
|
||||||
|
pname = "lab-monitoring";
|
||||||
|
subPackage = "cmd/lab-monitoring";
|
||||||
|
mainProgram = "lab-monitoring";
|
||||||
|
description = "MCP server for Prometheus and Alertmanager monitoring";
|
||||||
|
};
|
||||||
default = self.packages.${system}.nixpkgs-search;
|
default = self.packages.${system}.nixpkgs-search;
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -73,6 +80,10 @@
|
|||||||
imports = [ ./nix/hm-options-module.nix ];
|
imports = [ ./nix/hm-options-module.nix ];
|
||||||
services.hm-options-mcp.package = lib.mkDefault self.packages.${pkgs.system}.hm-options;
|
services.hm-options-mcp.package = lib.mkDefault self.packages.${pkgs.system}.hm-options;
|
||||||
};
|
};
|
||||||
|
lab-monitoring-mcp = { pkgs, ... }: {
|
||||||
|
imports = [ ./nix/lab-monitoring-module.nix ];
|
||||||
|
services.lab-monitoring.package = lib.mkDefault self.packages.${pkgs.system}.lab-monitoring;
|
||||||
|
};
|
||||||
default = self.nixosModules.nixpkgs-search-mcp;
|
default = self.nixosModules.nixpkgs-search-mcp;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -18,6 +18,8 @@ const (
|
|||||||
ModeOptions ServerMode = "options"
|
ModeOptions ServerMode = "options"
|
||||||
// ModePackages exposes only package-related tools.
|
// ModePackages exposes only package-related tools.
|
||||||
ModePackages ServerMode = "packages"
|
ModePackages ServerMode = "packages"
|
||||||
|
// ModeCustom exposes externally registered tools (no database required).
|
||||||
|
ModeCustom ServerMode = "custom"
|
||||||
)
|
)
|
||||||
|
|
||||||
// ServerConfig contains configuration for the MCP server.
|
// ServerConfig contains configuration for the MCP server.
|
||||||
@@ -40,7 +42,7 @@ type ServerConfig struct {
|
|||||||
func DefaultNixOSConfig() ServerConfig {
|
func DefaultNixOSConfig() ServerConfig {
|
||||||
return ServerConfig{
|
return ServerConfig{
|
||||||
Name: "nixos-options",
|
Name: "nixos-options",
|
||||||
Version: "0.2.0",
|
Version: "0.2.1",
|
||||||
DefaultChannel: "nixos-stable",
|
DefaultChannel: "nixos-stable",
|
||||||
SourceName: "nixpkgs",
|
SourceName: "nixpkgs",
|
||||||
Mode: ModeOptions,
|
Mode: ModeOptions,
|
||||||
@@ -60,7 +62,7 @@ This ensures option documentation matches the nixpkgs version the project actual
|
|||||||
func DefaultNixpkgsPackagesConfig() ServerConfig {
|
func DefaultNixpkgsPackagesConfig() ServerConfig {
|
||||||
return ServerConfig{
|
return ServerConfig{
|
||||||
Name: "nixpkgs-packages",
|
Name: "nixpkgs-packages",
|
||||||
Version: "0.2.0",
|
Version: "0.2.1",
|
||||||
DefaultChannel: "nixos-stable",
|
DefaultChannel: "nixos-stable",
|
||||||
SourceName: "nixpkgs",
|
SourceName: "nixpkgs",
|
||||||
Mode: ModePackages,
|
Mode: ModePackages,
|
||||||
@@ -74,11 +76,29 @@ This ensures package information matches the nixpkgs version the project actuall
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// DefaultMonitoringConfig returns the default configuration for the lab monitoring server.
|
||||||
|
func DefaultMonitoringConfig() ServerConfig {
|
||||||
|
return ServerConfig{
|
||||||
|
Name: "lab-monitoring",
|
||||||
|
Version: "0.1.0",
|
||||||
|
Mode: ModeCustom,
|
||||||
|
Instructions: `Lab Monitoring MCP Server - Query Prometheus metrics and Alertmanager alerts.
|
||||||
|
|
||||||
|
Tools for querying your monitoring stack:
|
||||||
|
- Search and query Prometheus metrics with PromQL
|
||||||
|
- List and inspect alerts from Alertmanager
|
||||||
|
- View scrape target health status
|
||||||
|
- Manage alert silences
|
||||||
|
|
||||||
|
All queries are executed against live Prometheus and Alertmanager HTTP APIs.`,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// DefaultHomeManagerConfig returns the default configuration for Home Manager options server.
|
// DefaultHomeManagerConfig returns the default configuration for Home Manager options server.
|
||||||
func DefaultHomeManagerConfig() ServerConfig {
|
func DefaultHomeManagerConfig() ServerConfig {
|
||||||
return ServerConfig{
|
return ServerConfig{
|
||||||
Name: "hm-options",
|
Name: "hm-options",
|
||||||
Version: "0.2.0",
|
Version: "0.2.1",
|
||||||
DefaultChannel: "hm-stable",
|
DefaultChannel: "hm-stable",
|
||||||
SourceName: "home-manager",
|
SourceName: "home-manager",
|
||||||
Mode: ModeOptions,
|
Mode: ModeOptions,
|
||||||
@@ -99,6 +119,7 @@ type Server struct {
|
|||||||
store database.Store
|
store database.Store
|
||||||
config ServerConfig
|
config ServerConfig
|
||||||
tools map[string]ToolHandler
|
tools map[string]ToolHandler
|
||||||
|
toolDefs []Tool
|
||||||
initialized bool
|
initialized bool
|
||||||
logger *log.Logger
|
logger *log.Logger
|
||||||
}
|
}
|
||||||
@@ -106,7 +127,7 @@ type Server struct {
|
|||||||
// ToolHandler is a function that handles a tool call.
|
// ToolHandler is a function that handles a tool call.
|
||||||
type ToolHandler func(ctx context.Context, args map[string]interface{}) (CallToolResult, error)
|
type ToolHandler func(ctx context.Context, args map[string]interface{}) (CallToolResult, error)
|
||||||
|
|
||||||
// NewServer creates a new MCP server with the given configuration.
|
// NewServer creates a new MCP server with a database store.
|
||||||
func NewServer(store database.Store, logger *log.Logger, config ServerConfig) *Server {
|
func NewServer(store database.Store, logger *log.Logger, config ServerConfig) *Server {
|
||||||
if logger == nil {
|
if logger == nil {
|
||||||
logger = log.New(io.Discard, "", 0)
|
logger = log.New(io.Discard, "", 0)
|
||||||
@@ -121,6 +142,25 @@ func NewServer(store database.Store, logger *log.Logger, config ServerConfig) *S
|
|||||||
return s
|
return s
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NewGenericServer creates a new MCP server without a database store.
|
||||||
|
// Use RegisterTool to add tools externally.
|
||||||
|
func NewGenericServer(logger *log.Logger, config ServerConfig) *Server {
|
||||||
|
if logger == nil {
|
||||||
|
logger = log.New(io.Discard, "", 0)
|
||||||
|
}
|
||||||
|
return &Server{
|
||||||
|
config: config,
|
||||||
|
tools: make(map[string]ToolHandler),
|
||||||
|
logger: logger,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// RegisterTool registers an externally defined tool with its handler.
|
||||||
|
func (s *Server) RegisterTool(tool Tool, handler ToolHandler) {
|
||||||
|
s.toolDefs = append(s.toolDefs, tool)
|
||||||
|
s.tools[tool.Name] = handler
|
||||||
|
}
|
||||||
|
|
||||||
// registerTools registers all available tools.
|
// registerTools registers all available tools.
|
||||||
func (s *Server) registerTools() {
|
func (s *Server) registerTools() {
|
||||||
// Tools will be implemented in handlers.go
|
// Tools will be implemented in handlers.go
|
||||||
@@ -237,6 +277,11 @@ func (s *Server) handleToolsList(req *Request) *Response {
|
|||||||
|
|
||||||
// getToolDefinitions returns the tool definitions.
|
// getToolDefinitions returns the tool definitions.
|
||||||
func (s *Server) getToolDefinitions() []Tool {
|
func (s *Server) getToolDefinitions() []Tool {
|
||||||
|
// For custom mode, return externally registered tools
|
||||||
|
if s.config.Mode == ModeCustom {
|
||||||
|
return s.toolDefs
|
||||||
|
}
|
||||||
|
|
||||||
// For packages mode, return package tools
|
// For packages mode, return package tools
|
||||||
if s.config.Mode == ModePackages {
|
if s.config.Mode == ModePackages {
|
||||||
return s.getPackageToolDefinitions()
|
return s.getPackageToolDefinitions()
|
||||||
|
|||||||
153
internal/monitoring/alertmanager.go
Normal file
153
internal/monitoring/alertmanager.go
Normal file
@@ -0,0 +1,153 @@
|
|||||||
|
package monitoring
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"net/url"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// AlertmanagerClient is an HTTP client for the Alertmanager API v2.
|
||||||
|
type AlertmanagerClient struct {
|
||||||
|
baseURL string
|
||||||
|
httpClient *http.Client
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewAlertmanagerClient creates a new Alertmanager API client.
|
||||||
|
func NewAlertmanagerClient(baseURL string) *AlertmanagerClient {
|
||||||
|
return &AlertmanagerClient{
|
||||||
|
baseURL: strings.TrimRight(baseURL, "/"),
|
||||||
|
httpClient: &http.Client{
|
||||||
|
Timeout: 30 * time.Second,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ListAlerts returns alerts matching the given filters.
|
||||||
|
func (c *AlertmanagerClient) ListAlerts(ctx context.Context, filters AlertFilters) ([]Alert, error) {
|
||||||
|
params := url.Values{}
|
||||||
|
|
||||||
|
if filters.Active != nil {
|
||||||
|
params.Set("active", fmt.Sprintf("%t", *filters.Active))
|
||||||
|
}
|
||||||
|
if filters.Silenced != nil {
|
||||||
|
params.Set("silenced", fmt.Sprintf("%t", *filters.Silenced))
|
||||||
|
}
|
||||||
|
if filters.Inhibited != nil {
|
||||||
|
params.Set("inhibited", fmt.Sprintf("%t", *filters.Inhibited))
|
||||||
|
}
|
||||||
|
if filters.Unprocessed != nil {
|
||||||
|
params.Set("unprocessed", fmt.Sprintf("%t", *filters.Unprocessed))
|
||||||
|
}
|
||||||
|
if filters.Receiver != "" {
|
||||||
|
params.Set("receiver", filters.Receiver)
|
||||||
|
}
|
||||||
|
for _, f := range filters.Filter {
|
||||||
|
params.Add("filter", f)
|
||||||
|
}
|
||||||
|
|
||||||
|
u := c.baseURL + "/api/v2/alerts"
|
||||||
|
if len(params) > 0 {
|
||||||
|
u += "?" + params.Encode()
|
||||||
|
}
|
||||||
|
|
||||||
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, u, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to create request: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := c.httpClient.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("request failed: %w", err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close() //nolint:errcheck // cleanup on exit
|
||||||
|
|
||||||
|
body, err := io.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to read response: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
return nil, fmt.Errorf("unexpected status %d: %s", resp.StatusCode, string(body))
|
||||||
|
}
|
||||||
|
|
||||||
|
var alerts []Alert
|
||||||
|
if err := json.Unmarshal(body, &alerts); err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to parse alerts: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return alerts, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// ListSilences returns all silences.
|
||||||
|
func (c *AlertmanagerClient) ListSilences(ctx context.Context) ([]Silence, error) {
|
||||||
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, c.baseURL+"/api/v2/silences", nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to create request: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := c.httpClient.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("request failed: %w", err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close() //nolint:errcheck // cleanup on exit
|
||||||
|
|
||||||
|
body, err := io.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to read response: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
return nil, fmt.Errorf("unexpected status %d: %s", resp.StatusCode, string(body))
|
||||||
|
}
|
||||||
|
|
||||||
|
var silences []Silence
|
||||||
|
if err := json.Unmarshal(body, &silences); err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to parse silences: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return silences, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// CreateSilence creates a new silence and returns the silence ID.
|
||||||
|
func (c *AlertmanagerClient) CreateSilence(ctx context.Context, silence Silence) (string, error) {
|
||||||
|
data, err := json.Marshal(silence)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("failed to marshal silence: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.baseURL+"/api/v2/silences", bytes.NewReader(data))
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("failed to create request: %w", err)
|
||||||
|
}
|
||||||
|
req.Header.Set("Content-Type", "application/json")
|
||||||
|
|
||||||
|
resp, err := c.httpClient.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("request failed: %w", err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close() //nolint:errcheck // cleanup on exit
|
||||||
|
|
||||||
|
body, err := io.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("failed to read response: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
return "", fmt.Errorf("unexpected status %d: %s", resp.StatusCode, string(body))
|
||||||
|
}
|
||||||
|
|
||||||
|
var result struct {
|
||||||
|
SilenceID string `json:"silenceID"`
|
||||||
|
}
|
||||||
|
if err := json.Unmarshal(body, &result); err != nil {
|
||||||
|
return "", fmt.Errorf("failed to parse response: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return result.SilenceID, nil
|
||||||
|
}
|
||||||
175
internal/monitoring/alertmanager_test.go
Normal file
175
internal/monitoring/alertmanager_test.go
Normal file
@@ -0,0 +1,175 @@
|
|||||||
|
package monitoring
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestAlertmanagerClient_ListAlerts(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if r.URL.Path != "/api/v2/alerts" {
|
||||||
|
t.Errorf("unexpected path: %s", r.URL.Path)
|
||||||
|
}
|
||||||
|
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
_, _ = w.Write([]byte(`[
|
||||||
|
{
|
||||||
|
"annotations": {"summary": "Target is down"},
|
||||||
|
"endsAt": "2024-01-01T01:00:00Z",
|
||||||
|
"fingerprint": "abc123",
|
||||||
|
"receivers": [{"name": "default"}],
|
||||||
|
"startsAt": "2024-01-01T00:00:00Z",
|
||||||
|
"status": {"inhibitedBy": [], "silencedBy": [], "state": "active"},
|
||||||
|
"updatedAt": "2024-01-01T00:00:00Z",
|
||||||
|
"generatorURL": "http://prometheus:9090/graph",
|
||||||
|
"labels": {"alertname": "TargetDown", "severity": "critical", "instance": "node1:9100"}
|
||||||
|
}
|
||||||
|
]`))
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
client := NewAlertmanagerClient(srv.URL)
|
||||||
|
alerts, err := client.ListAlerts(context.Background(), AlertFilters{})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(alerts) != 1 {
|
||||||
|
t.Fatalf("expected 1 alert, got %d", len(alerts))
|
||||||
|
}
|
||||||
|
if alerts[0].Fingerprint != "abc123" {
|
||||||
|
t.Errorf("expected fingerprint=abc123, got %s", alerts[0].Fingerprint)
|
||||||
|
}
|
||||||
|
if alerts[0].Labels["alertname"] != "TargetDown" {
|
||||||
|
t.Errorf("expected alertname=TargetDown, got %s", alerts[0].Labels["alertname"])
|
||||||
|
}
|
||||||
|
if alerts[0].Status.State != "active" {
|
||||||
|
t.Errorf("expected state=active, got %s", alerts[0].Status.State)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAlertmanagerClient_ListAlertsWithFilters(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
q := r.URL.Query()
|
||||||
|
if q.Get("active") != "true" {
|
||||||
|
t.Errorf("expected active=true, got %s", q.Get("active"))
|
||||||
|
}
|
||||||
|
if q.Get("silenced") != "false" {
|
||||||
|
t.Errorf("expected silenced=false, got %s", q.Get("silenced"))
|
||||||
|
}
|
||||||
|
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
_, _ = w.Write([]byte(`[]`))
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
client := NewAlertmanagerClient(srv.URL)
|
||||||
|
active := true
|
||||||
|
silenced := false
|
||||||
|
_, err := client.ListAlerts(context.Background(), AlertFilters{
|
||||||
|
Active: &active,
|
||||||
|
Silenced: &silenced,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAlertmanagerClient_ListSilences(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if r.URL.Path != "/api/v2/silences" {
|
||||||
|
t.Errorf("unexpected path: %s", r.URL.Path)
|
||||||
|
}
|
||||||
|
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
_, _ = w.Write([]byte(`[
|
||||||
|
{
|
||||||
|
"id": "silence-1",
|
||||||
|
"matchers": [{"name": "alertname", "value": "TargetDown", "isRegex": false}],
|
||||||
|
"startsAt": "2024-01-01T00:00:00Z",
|
||||||
|
"endsAt": "2024-01-01T02:00:00Z",
|
||||||
|
"createdBy": "admin",
|
||||||
|
"comment": "Maintenance window",
|
||||||
|
"status": {"state": "active"}
|
||||||
|
}
|
||||||
|
]`))
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
client := NewAlertmanagerClient(srv.URL)
|
||||||
|
silences, err := client.ListSilences(context.Background())
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(silences) != 1 {
|
||||||
|
t.Fatalf("expected 1 silence, got %d", len(silences))
|
||||||
|
}
|
||||||
|
if silences[0].ID != "silence-1" {
|
||||||
|
t.Errorf("expected id=silence-1, got %s", silences[0].ID)
|
||||||
|
}
|
||||||
|
if silences[0].CreatedBy != "admin" {
|
||||||
|
t.Errorf("expected createdBy=admin, got %s", silences[0].CreatedBy)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAlertmanagerClient_CreateSilence(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if r.Method != http.MethodPost {
|
||||||
|
t.Errorf("expected POST, got %s", r.Method)
|
||||||
|
}
|
||||||
|
if r.URL.Path != "/api/v2/silences" {
|
||||||
|
t.Errorf("unexpected path: %s", r.URL.Path)
|
||||||
|
}
|
||||||
|
if r.Header.Get("Content-Type") != "application/json" {
|
||||||
|
t.Errorf("expected Content-Type=application/json, got %s", r.Header.Get("Content-Type"))
|
||||||
|
}
|
||||||
|
|
||||||
|
var silence Silence
|
||||||
|
if err := json.NewDecoder(r.Body).Decode(&silence); err != nil {
|
||||||
|
t.Fatalf("failed to decode request body: %v", err)
|
||||||
|
}
|
||||||
|
if silence.CreatedBy != "admin" {
|
||||||
|
t.Errorf("expected createdBy=admin, got %s", silence.CreatedBy)
|
||||||
|
}
|
||||||
|
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
_, _ = w.Write([]byte(`{"silenceID": "new-silence-id"}`))
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
client := NewAlertmanagerClient(srv.URL)
|
||||||
|
id, err := client.CreateSilence(context.Background(), Silence{
|
||||||
|
Matchers: []Matcher{
|
||||||
|
{Name: "alertname", Value: "TargetDown", IsRegex: false},
|
||||||
|
},
|
||||||
|
StartsAt: time.Now(),
|
||||||
|
EndsAt: time.Now().Add(2 * time.Hour),
|
||||||
|
CreatedBy: "admin",
|
||||||
|
Comment: "Test silence",
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
if id != "new-silence-id" {
|
||||||
|
t.Errorf("expected id=new-silence-id, got %s", id)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAlertmanagerClient_HTTPError(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.WriteHeader(http.StatusInternalServerError)
|
||||||
|
_, _ = w.Write([]byte("internal error"))
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
client := NewAlertmanagerClient(srv.URL)
|
||||||
|
_, err := client.ListAlerts(context.Background(), AlertFilters{})
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("expected error, got nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
317
internal/monitoring/format.go
Normal file
317
internal/monitoring/format.go
Normal file
@@ -0,0 +1,317 @@
|
|||||||
|
package monitoring
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
const maxRows = 100
|
||||||
|
|
||||||
|
// formatInstantVector formats instant vector results as a markdown table.
|
||||||
|
func formatInstantVector(results []PromInstantVector) string {
|
||||||
|
if len(results) == 0 {
|
||||||
|
return "No results."
|
||||||
|
}
|
||||||
|
|
||||||
|
// Collect all label keys across results (excluding __name__)
|
||||||
|
labelKeys := collectLabelKeys(results)
|
||||||
|
|
||||||
|
var sb strings.Builder
|
||||||
|
|
||||||
|
// Header
|
||||||
|
sb.WriteString("| ")
|
||||||
|
if _, ok := results[0].Metric["__name__"]; ok {
|
||||||
|
sb.WriteString("Metric | ")
|
||||||
|
}
|
||||||
|
for _, key := range labelKeys {
|
||||||
|
sb.WriteString(key)
|
||||||
|
sb.WriteString(" | ")
|
||||||
|
}
|
||||||
|
sb.WriteString("Value |\n")
|
||||||
|
|
||||||
|
// Separator
|
||||||
|
sb.WriteString("| ")
|
||||||
|
if _, ok := results[0].Metric["__name__"]; ok {
|
||||||
|
sb.WriteString("--- | ")
|
||||||
|
}
|
||||||
|
for range labelKeys {
|
||||||
|
sb.WriteString("--- | ")
|
||||||
|
}
|
||||||
|
sb.WriteString("--- |\n")
|
||||||
|
|
||||||
|
// Rows
|
||||||
|
truncated := false
|
||||||
|
for i, r := range results {
|
||||||
|
if i >= maxRows {
|
||||||
|
truncated = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
sb.WriteString("| ")
|
||||||
|
if _, ok := results[0].Metric["__name__"]; ok {
|
||||||
|
sb.WriteString(r.Metric["__name__"])
|
||||||
|
sb.WriteString(" | ")
|
||||||
|
}
|
||||||
|
for _, key := range labelKeys {
|
||||||
|
sb.WriteString(r.Metric[key])
|
||||||
|
sb.WriteString(" | ")
|
||||||
|
}
|
||||||
|
// Value is at index 1 of the value tuple
|
||||||
|
if len(r.Value) >= 2 {
|
||||||
|
if v, ok := r.Value[1].(string); ok {
|
||||||
|
sb.WriteString(v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sb.WriteString(" |\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
if truncated {
|
||||||
|
sb.WriteString(fmt.Sprintf("\n*Showing %d of %d results (truncated)*\n", maxRows, len(results)))
|
||||||
|
}
|
||||||
|
|
||||||
|
return sb.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
// collectLabelKeys returns sorted label keys across all results, excluding __name__.
|
||||||
|
func collectLabelKeys(results []PromInstantVector) []string {
|
||||||
|
keySet := make(map[string]struct{})
|
||||||
|
for _, r := range results {
|
||||||
|
for k := range r.Metric {
|
||||||
|
if k != "__name__" {
|
||||||
|
keySet[k] = struct{}{}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
keys := make([]string, 0, len(keySet))
|
||||||
|
for k := range keySet {
|
||||||
|
keys = append(keys, k)
|
||||||
|
}
|
||||||
|
sort.Strings(keys)
|
||||||
|
return keys
|
||||||
|
}
|
||||||
|
|
||||||
|
// formatAlerts formats alerts as grouped markdown.
|
||||||
|
func formatAlerts(alerts []Alert) string {
|
||||||
|
if len(alerts) == 0 {
|
||||||
|
return "No alerts found."
|
||||||
|
}
|
||||||
|
|
||||||
|
// Group by alertname
|
||||||
|
groups := make(map[string][]Alert)
|
||||||
|
var order []string
|
||||||
|
for _, a := range alerts {
|
||||||
|
name := a.Labels["alertname"]
|
||||||
|
if _, exists := groups[name]; !exists {
|
||||||
|
order = append(order, name)
|
||||||
|
}
|
||||||
|
groups[name] = append(groups[name], a)
|
||||||
|
}
|
||||||
|
|
||||||
|
var sb strings.Builder
|
||||||
|
sb.WriteString(fmt.Sprintf("**%d alert(s)**\n\n", len(alerts)))
|
||||||
|
|
||||||
|
for _, name := range order {
|
||||||
|
group := groups[name]
|
||||||
|
sb.WriteString(fmt.Sprintf("## %s (%d)\n\n", name, len(group)))
|
||||||
|
|
||||||
|
for i, a := range group {
|
||||||
|
if i >= maxRows {
|
||||||
|
sb.WriteString(fmt.Sprintf("*... and %d more*\n", len(group)-maxRows))
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
sb.WriteString(fmt.Sprintf("**State:** %s | **Severity:** %s\n", a.Status.State, a.Labels["severity"]))
|
||||||
|
|
||||||
|
// Labels (excluding alertname and severity)
|
||||||
|
var labels []string
|
||||||
|
for k, v := range a.Labels {
|
||||||
|
if k != "alertname" && k != "severity" {
|
||||||
|
labels = append(labels, fmt.Sprintf("%s=%s", k, v))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sort.Strings(labels)
|
||||||
|
if len(labels) > 0 {
|
||||||
|
sb.WriteString(fmt.Sprintf("**Labels:** %s\n", strings.Join(labels, ", ")))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Annotations
|
||||||
|
for k, v := range a.Annotations {
|
||||||
|
sb.WriteString(fmt.Sprintf("**%s:** %s\n", k, v))
|
||||||
|
}
|
||||||
|
|
||||||
|
sb.WriteString(fmt.Sprintf("**Fingerprint:** %s\n", a.Fingerprint))
|
||||||
|
sb.WriteString(fmt.Sprintf("**Started:** %s\n", a.StartsAt.Format(time.RFC3339)))
|
||||||
|
|
||||||
|
if len(a.Status.SilencedBy) > 0 {
|
||||||
|
sb.WriteString(fmt.Sprintf("**Silenced by:** %s\n", strings.Join(a.Status.SilencedBy, ", ")))
|
||||||
|
}
|
||||||
|
if len(a.Status.InhibitedBy) > 0 {
|
||||||
|
sb.WriteString(fmt.Sprintf("**Inhibited by:** %s\n", strings.Join(a.Status.InhibitedBy, ", ")))
|
||||||
|
}
|
||||||
|
|
||||||
|
sb.WriteString("\n")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return sb.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
// formatTargets formats targets as grouped markdown.
|
||||||
|
func formatTargets(targets *PromTargetsData) string {
|
||||||
|
if targets == nil || len(targets.ActiveTargets) == 0 {
|
||||||
|
return "No active targets."
|
||||||
|
}
|
||||||
|
|
||||||
|
// Group by job
|
||||||
|
groups := make(map[string][]PromTarget)
|
||||||
|
var order []string
|
||||||
|
for _, t := range targets.ActiveTargets {
|
||||||
|
job := t.Labels["job"]
|
||||||
|
if _, exists := groups[job]; !exists {
|
||||||
|
order = append(order, job)
|
||||||
|
}
|
||||||
|
groups[job] = append(groups[job], t)
|
||||||
|
}
|
||||||
|
sort.Strings(order)
|
||||||
|
|
||||||
|
var sb strings.Builder
|
||||||
|
sb.WriteString(fmt.Sprintf("**%d active target(s)**\n\n", len(targets.ActiveTargets)))
|
||||||
|
|
||||||
|
// Count health statuses
|
||||||
|
healthCounts := make(map[string]int)
|
||||||
|
for _, t := range targets.ActiveTargets {
|
||||||
|
healthCounts[t.Health]++
|
||||||
|
}
|
||||||
|
var healthParts []string
|
||||||
|
for h, c := range healthCounts {
|
||||||
|
healthParts = append(healthParts, fmt.Sprintf("%s: %d", h, c))
|
||||||
|
}
|
||||||
|
sort.Strings(healthParts)
|
||||||
|
sb.WriteString(fmt.Sprintf("**Health summary:** %s\n\n", strings.Join(healthParts, ", ")))
|
||||||
|
|
||||||
|
for _, job := range order {
|
||||||
|
group := groups[job]
|
||||||
|
sb.WriteString(fmt.Sprintf("## %s (%d targets)\n\n", job, len(group)))
|
||||||
|
|
||||||
|
sb.WriteString("| Instance | Health | Last Scrape | Duration | Error |\n")
|
||||||
|
sb.WriteString("| --- | --- | --- | --- | --- |\n")
|
||||||
|
|
||||||
|
for _, t := range group {
|
||||||
|
instance := t.Labels["instance"]
|
||||||
|
lastScrape := t.LastScrape.Format("15:04:05")
|
||||||
|
duration := fmt.Sprintf("%.3fs", t.LastScrapeDuration)
|
||||||
|
lastErr := t.LastError
|
||||||
|
if lastErr == "" {
|
||||||
|
lastErr = "-"
|
||||||
|
}
|
||||||
|
sb.WriteString(fmt.Sprintf("| %s | %s | %s | %s | %s |\n",
|
||||||
|
instance, t.Health, lastScrape, duration, lastErr))
|
||||||
|
}
|
||||||
|
sb.WriteString("\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
return sb.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
// formatSilences formats silences as markdown.
|
||||||
|
func formatSilences(silences []Silence) string {
|
||||||
|
if len(silences) == 0 {
|
||||||
|
return "No silences found."
|
||||||
|
}
|
||||||
|
|
||||||
|
var sb strings.Builder
|
||||||
|
sb.WriteString(fmt.Sprintf("**%d silence(s)**\n\n", len(silences)))
|
||||||
|
|
||||||
|
for _, s := range silences {
|
||||||
|
state := "unknown"
|
||||||
|
if s.Status != nil {
|
||||||
|
state = s.Status.State
|
||||||
|
}
|
||||||
|
sb.WriteString(fmt.Sprintf("## Silence %s [%s]\n\n", s.ID, state))
|
||||||
|
|
||||||
|
// Matchers
|
||||||
|
var matchers []string
|
||||||
|
for _, m := range s.Matchers {
|
||||||
|
op := "="
|
||||||
|
if m.IsRegex {
|
||||||
|
op = "=~"
|
||||||
|
}
|
||||||
|
if m.IsEqual != nil && !*m.IsEqual {
|
||||||
|
if m.IsRegex {
|
||||||
|
op = "!~"
|
||||||
|
} else {
|
||||||
|
op = "!="
|
||||||
|
}
|
||||||
|
}
|
||||||
|
matchers = append(matchers, fmt.Sprintf("%s%s%s", m.Name, op, m.Value))
|
||||||
|
}
|
||||||
|
sb.WriteString(fmt.Sprintf("**Matchers:** %s\n", strings.Join(matchers, ", ")))
|
||||||
|
sb.WriteString(fmt.Sprintf("**Created by:** %s\n", s.CreatedBy))
|
||||||
|
sb.WriteString(fmt.Sprintf("**Comment:** %s\n", s.Comment))
|
||||||
|
sb.WriteString(fmt.Sprintf("**Starts:** %s\n", s.StartsAt.Format(time.RFC3339)))
|
||||||
|
sb.WriteString(fmt.Sprintf("**Ends:** %s\n", s.EndsAt.Format(time.RFC3339)))
|
||||||
|
sb.WriteString("\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
return sb.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
// formatMetricSearch formats metric search results.
|
||||||
|
func formatMetricSearch(names []string, metadata map[string][]PromMetadata) string {
|
||||||
|
if len(names) == 0 {
|
||||||
|
return "No metrics found matching the search."
|
||||||
|
}
|
||||||
|
|
||||||
|
var sb strings.Builder
|
||||||
|
sb.WriteString(fmt.Sprintf("**%d metric(s) found**\n\n", len(names)))
|
||||||
|
|
||||||
|
sb.WriteString("| Metric | Type | Help |\n")
|
||||||
|
sb.WriteString("| --- | --- | --- |\n")
|
||||||
|
|
||||||
|
truncated := false
|
||||||
|
for i, name := range names {
|
||||||
|
if i >= maxRows {
|
||||||
|
truncated = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
metaType := ""
|
||||||
|
help := ""
|
||||||
|
if metas, ok := metadata[name]; ok && len(metas) > 0 {
|
||||||
|
metaType = metas[0].Type
|
||||||
|
help = metas[0].Help
|
||||||
|
if len(help) > 100 {
|
||||||
|
help = help[:100] + "..."
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sb.WriteString(fmt.Sprintf("| %s | %s | %s |\n", name, metaType, help))
|
||||||
|
}
|
||||||
|
|
||||||
|
if truncated {
|
||||||
|
sb.WriteString(fmt.Sprintf("\n*Showing %d of %d metrics (truncated)*\n", maxRows, len(names)))
|
||||||
|
}
|
||||||
|
|
||||||
|
return sb.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
// formatMetricMetadata formats metadata for a single metric.
|
||||||
|
func formatMetricMetadata(name string, metas []PromMetadata) string {
|
||||||
|
if len(metas) == 0 {
|
||||||
|
return fmt.Sprintf("No metadata found for metric '%s'.", name)
|
||||||
|
}
|
||||||
|
|
||||||
|
var sb strings.Builder
|
||||||
|
sb.WriteString(fmt.Sprintf("# %s\n\n", name))
|
||||||
|
|
||||||
|
for _, m := range metas {
|
||||||
|
sb.WriteString(fmt.Sprintf("**Type:** %s\n", m.Type))
|
||||||
|
if m.Help != "" {
|
||||||
|
sb.WriteString(fmt.Sprintf("**Help:** %s\n", m.Help))
|
||||||
|
}
|
||||||
|
if m.Unit != "" {
|
||||||
|
sb.WriteString(fmt.Sprintf("**Unit:** %s\n", m.Unit))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return sb.String()
|
||||||
|
}
|
||||||
434
internal/monitoring/handlers.go
Normal file
434
internal/monitoring/handlers.go
Normal file
@@ -0,0 +1,434 @@
|
|||||||
|
package monitoring
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"git.t-juice.club/torjus/labmcp/internal/mcp"
|
||||||
|
)
|
||||||
|
|
||||||
|
// RegisterHandlers registers all monitoring tool handlers on the MCP server.
|
||||||
|
func RegisterHandlers(server *mcp.Server, prom *PrometheusClient, am *AlertmanagerClient) {
|
||||||
|
server.RegisterTool(listAlertsTool(), makeListAlertsHandler(am))
|
||||||
|
server.RegisterTool(getAlertTool(), makeGetAlertHandler(am))
|
||||||
|
server.RegisterTool(searchMetricsTool(), makeSearchMetricsHandler(prom))
|
||||||
|
server.RegisterTool(getMetricMetadataTool(), makeGetMetricMetadataHandler(prom))
|
||||||
|
server.RegisterTool(queryTool(), makeQueryHandler(prom))
|
||||||
|
server.RegisterTool(listTargetsTool(), makeListTargetsHandler(prom))
|
||||||
|
server.RegisterTool(listSilencesTool(), makeListSilencesHandler(am))
|
||||||
|
server.RegisterTool(createSilenceTool(), makeCreateSilenceHandler(am))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Tool definitions
|
||||||
|
|
||||||
|
func listAlertsTool() mcp.Tool {
|
||||||
|
return mcp.Tool{
|
||||||
|
Name: "list_alerts",
|
||||||
|
Description: "List alerts from Alertmanager with optional filters",
|
||||||
|
InputSchema: mcp.InputSchema{
|
||||||
|
Type: "object",
|
||||||
|
Properties: map[string]mcp.Property{
|
||||||
|
"state": {
|
||||||
|
Type: "string",
|
||||||
|
Description: "Filter by alert state: 'active', 'suppressed', or 'unprocessed'",
|
||||||
|
Enum: []string{"active", "suppressed", "unprocessed"},
|
||||||
|
},
|
||||||
|
"severity": {
|
||||||
|
Type: "string",
|
||||||
|
Description: "Filter by severity label (e.g., 'critical', 'warning')",
|
||||||
|
},
|
||||||
|
"receiver": {
|
||||||
|
Type: "string",
|
||||||
|
Description: "Filter by receiver name",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func getAlertTool() mcp.Tool {
|
||||||
|
return mcp.Tool{
|
||||||
|
Name: "get_alert",
|
||||||
|
Description: "Get full details for a specific alert by fingerprint",
|
||||||
|
InputSchema: mcp.InputSchema{
|
||||||
|
Type: "object",
|
||||||
|
Properties: map[string]mcp.Property{
|
||||||
|
"fingerprint": {
|
||||||
|
Type: "string",
|
||||||
|
Description: "Alert fingerprint identifier",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Required: []string{"fingerprint"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func searchMetricsTool() mcp.Tool {
|
||||||
|
return mcp.Tool{
|
||||||
|
Name: "search_metrics",
|
||||||
|
Description: "Search Prometheus metric names with optional substring filter, enriched with metadata (type, help text)",
|
||||||
|
InputSchema: mcp.InputSchema{
|
||||||
|
Type: "object",
|
||||||
|
Properties: map[string]mcp.Property{
|
||||||
|
"query": {
|
||||||
|
Type: "string",
|
||||||
|
Description: "Substring to filter metric names (e.g., 'cpu', 'memory', 'node_'). Empty returns all metrics.",
|
||||||
|
},
|
||||||
|
"limit": {
|
||||||
|
Type: "integer",
|
||||||
|
Description: "Maximum number of results (default: 50)",
|
||||||
|
Default: 50,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func getMetricMetadataTool() mcp.Tool {
|
||||||
|
return mcp.Tool{
|
||||||
|
Name: "get_metric_metadata",
|
||||||
|
Description: "Get type, help text, and unit for a specific Prometheus metric",
|
||||||
|
InputSchema: mcp.InputSchema{
|
||||||
|
Type: "object",
|
||||||
|
Properties: map[string]mcp.Property{
|
||||||
|
"metric": {
|
||||||
|
Type: "string",
|
||||||
|
Description: "Metric name (e.g., 'node_cpu_seconds_total')",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Required: []string{"metric"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func queryTool() mcp.Tool {
|
||||||
|
return mcp.Tool{
|
||||||
|
Name: "query",
|
||||||
|
Description: "Execute an instant PromQL query against Prometheus. Supports aggregations like avg_over_time(metric[1h]), rate(), sum(), etc.",
|
||||||
|
InputSchema: mcp.InputSchema{
|
||||||
|
Type: "object",
|
||||||
|
Properties: map[string]mcp.Property{
|
||||||
|
"promql": {
|
||||||
|
Type: "string",
|
||||||
|
Description: "PromQL expression to evaluate (e.g., 'up', 'rate(http_requests_total[5m])', 'avg_over_time(node_load1[1h])')",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Required: []string{"promql"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func listTargetsTool() mcp.Tool {
|
||||||
|
return mcp.Tool{
|
||||||
|
Name: "list_targets",
|
||||||
|
Description: "List Prometheus scrape targets with health status, grouped by job",
|
||||||
|
InputSchema: mcp.InputSchema{
|
||||||
|
Type: "object",
|
||||||
|
Properties: map[string]mcp.Property{},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func listSilencesTool() mcp.Tool {
|
||||||
|
return mcp.Tool{
|
||||||
|
Name: "list_silences",
|
||||||
|
Description: "List active and pending alert silences from Alertmanager",
|
||||||
|
InputSchema: mcp.InputSchema{
|
||||||
|
Type: "object",
|
||||||
|
Properties: map[string]mcp.Property{},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func createSilenceTool() mcp.Tool {
|
||||||
|
return mcp.Tool{
|
||||||
|
Name: "create_silence",
|
||||||
|
Description: `Create a new silence in Alertmanager. IMPORTANT: Always confirm with the user before creating a silence, showing them the matchers, duration, and reason.`,
|
||||||
|
InputSchema: mcp.InputSchema{
|
||||||
|
Type: "object",
|
||||||
|
Properties: map[string]mcp.Property{
|
||||||
|
"matchers": {
|
||||||
|
Type: "string",
|
||||||
|
Description: `JSON array of matchers, e.g. [{"name":"alertname","value":"TargetDown","isRegex":false}]`,
|
||||||
|
},
|
||||||
|
"duration": {
|
||||||
|
Type: "string",
|
||||||
|
Description: "Silence duration in Go duration format (e.g., '2h', '30m', '1h30m')",
|
||||||
|
},
|
||||||
|
"author": {
|
||||||
|
Type: "string",
|
||||||
|
Description: "Author of the silence",
|
||||||
|
},
|
||||||
|
"comment": {
|
||||||
|
Type: "string",
|
||||||
|
Description: "Reason for the silence",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Required: []string{"matchers", "duration", "author", "comment"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handler constructors
|
||||||
|
|
||||||
|
func makeListAlertsHandler(am *AlertmanagerClient) mcp.ToolHandler {
|
||||||
|
return func(ctx context.Context, args map[string]interface{}) (mcp.CallToolResult, error) {
|
||||||
|
filters := AlertFilters{}
|
||||||
|
|
||||||
|
if state, ok := args["state"].(string); ok && state != "" {
|
||||||
|
switch state {
|
||||||
|
case "active":
|
||||||
|
active := true
|
||||||
|
filters.Active = &active
|
||||||
|
silenced := false
|
||||||
|
filters.Silenced = &silenced
|
||||||
|
inhibited := false
|
||||||
|
filters.Inhibited = &inhibited
|
||||||
|
case "suppressed":
|
||||||
|
active := false
|
||||||
|
filters.Active = &active
|
||||||
|
case "unprocessed":
|
||||||
|
unprocessed := true
|
||||||
|
filters.Unprocessed = &unprocessed
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if severity, ok := args["severity"].(string); ok && severity != "" {
|
||||||
|
filters.Filter = append(filters.Filter, fmt.Sprintf(`severity="%s"`, severity))
|
||||||
|
}
|
||||||
|
|
||||||
|
if receiver, ok := args["receiver"].(string); ok && receiver != "" {
|
||||||
|
filters.Receiver = receiver
|
||||||
|
}
|
||||||
|
|
||||||
|
alerts, err := am.ListAlerts(ctx, filters)
|
||||||
|
if err != nil {
|
||||||
|
return mcp.ErrorContent(fmt.Errorf("failed to list alerts: %w", err)), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return mcp.CallToolResult{
|
||||||
|
Content: []mcp.Content{mcp.TextContent(formatAlerts(alerts))},
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func makeGetAlertHandler(am *AlertmanagerClient) mcp.ToolHandler {
|
||||||
|
return func(ctx context.Context, args map[string]interface{}) (mcp.CallToolResult, error) {
|
||||||
|
fingerprint, _ := args["fingerprint"].(string)
|
||||||
|
if fingerprint == "" {
|
||||||
|
return mcp.ErrorContent(fmt.Errorf("fingerprint is required")), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fetch all alerts and find the one matching the fingerprint
|
||||||
|
alerts, err := am.ListAlerts(ctx, AlertFilters{})
|
||||||
|
if err != nil {
|
||||||
|
return mcp.ErrorContent(fmt.Errorf("failed to fetch alerts: %w", err)), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, a := range alerts {
|
||||||
|
if a.Fingerprint == fingerprint {
|
||||||
|
return mcp.CallToolResult{
|
||||||
|
Content: []mcp.Content{mcp.TextContent(formatAlerts([]Alert{a}))},
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return mcp.ErrorContent(fmt.Errorf("alert with fingerprint '%s' not found", fingerprint)), nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func makeSearchMetricsHandler(prom *PrometheusClient) mcp.ToolHandler {
|
||||||
|
return func(ctx context.Context, args map[string]interface{}) (mcp.CallToolResult, error) {
|
||||||
|
query, _ := args["query"].(string)
|
||||||
|
limit := 50
|
||||||
|
if l, ok := args["limit"].(float64); ok && l > 0 {
|
||||||
|
limit = int(l)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get all metric names
|
||||||
|
allNames, err := prom.LabelValues(ctx, "__name__")
|
||||||
|
if err != nil {
|
||||||
|
return mcp.ErrorContent(fmt.Errorf("failed to fetch metric names: %w", err)), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Filter by substring
|
||||||
|
var matched []string
|
||||||
|
queryLower := strings.ToLower(query)
|
||||||
|
for _, name := range allNames {
|
||||||
|
if query == "" || strings.Contains(strings.ToLower(name), queryLower) {
|
||||||
|
matched = append(matched, name)
|
||||||
|
if len(matched) >= limit {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fetch metadata for matched metrics
|
||||||
|
metadata, err := prom.Metadata(ctx, "")
|
||||||
|
if err != nil {
|
||||||
|
// Non-fatal: proceed without metadata
|
||||||
|
metadata = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return mcp.CallToolResult{
|
||||||
|
Content: []mcp.Content{mcp.TextContent(formatMetricSearch(matched, metadata))},
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func makeGetMetricMetadataHandler(prom *PrometheusClient) mcp.ToolHandler {
|
||||||
|
return func(ctx context.Context, args map[string]interface{}) (mcp.CallToolResult, error) {
|
||||||
|
metric, _ := args["metric"].(string)
|
||||||
|
if metric == "" {
|
||||||
|
return mcp.ErrorContent(fmt.Errorf("metric is required")), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
metadata, err := prom.Metadata(ctx, metric)
|
||||||
|
if err != nil {
|
||||||
|
return mcp.ErrorContent(fmt.Errorf("failed to fetch metadata: %w", err)), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
metas := metadata[metric]
|
||||||
|
return mcp.CallToolResult{
|
||||||
|
Content: []mcp.Content{mcp.TextContent(formatMetricMetadata(metric, metas))},
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func makeQueryHandler(prom *PrometheusClient) mcp.ToolHandler {
|
||||||
|
return func(ctx context.Context, args map[string]interface{}) (mcp.CallToolResult, error) {
|
||||||
|
promql, _ := args["promql"].(string)
|
||||||
|
if promql == "" {
|
||||||
|
return mcp.ErrorContent(fmt.Errorf("promql is required")), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
data, err := prom.Query(ctx, promql, time.Time{})
|
||||||
|
if err != nil {
|
||||||
|
return mcp.ErrorContent(fmt.Errorf("query failed: %w", err)), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var result string
|
||||||
|
switch data.ResultType {
|
||||||
|
case "vector":
|
||||||
|
result = formatInstantVector(data.Result)
|
||||||
|
case "scalar":
|
||||||
|
if len(data.Result) > 0 && len(data.Result[0].Value) >= 2 {
|
||||||
|
if v, ok := data.Result[0].Value[1].(string); ok {
|
||||||
|
result = fmt.Sprintf("**Scalar result:** %s", v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if result == "" {
|
||||||
|
result = "Scalar query returned no value."
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
result = fmt.Sprintf("Result type: %s\n\n%s", data.ResultType, formatInstantVector(data.Result))
|
||||||
|
}
|
||||||
|
|
||||||
|
return mcp.CallToolResult{
|
||||||
|
Content: []mcp.Content{mcp.TextContent(result)},
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func makeListTargetsHandler(prom *PrometheusClient) mcp.ToolHandler {
|
||||||
|
return func(ctx context.Context, args map[string]interface{}) (mcp.CallToolResult, error) {
|
||||||
|
data, err := prom.Targets(ctx)
|
||||||
|
if err != nil {
|
||||||
|
return mcp.ErrorContent(fmt.Errorf("failed to fetch targets: %w", err)), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return mcp.CallToolResult{
|
||||||
|
Content: []mcp.Content{mcp.TextContent(formatTargets(data))},
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func makeListSilencesHandler(am *AlertmanagerClient) mcp.ToolHandler {
|
||||||
|
return func(ctx context.Context, args map[string]interface{}) (mcp.CallToolResult, error) {
|
||||||
|
silences, err := am.ListSilences(ctx)
|
||||||
|
if err != nil {
|
||||||
|
return mcp.ErrorContent(fmt.Errorf("failed to fetch silences: %w", err)), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Filter to active/pending only
|
||||||
|
var filtered []Silence
|
||||||
|
for _, s := range silences {
|
||||||
|
if s.Status != nil && (s.Status.State == "active" || s.Status.State == "pending") {
|
||||||
|
filtered = append(filtered, s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return mcp.CallToolResult{
|
||||||
|
Content: []mcp.Content{mcp.TextContent(formatSilences(filtered))},
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func makeCreateSilenceHandler(am *AlertmanagerClient) mcp.ToolHandler {
|
||||||
|
return func(ctx context.Context, args map[string]interface{}) (mcp.CallToolResult, error) {
|
||||||
|
matchersJSON, _ := args["matchers"].(string)
|
||||||
|
if matchersJSON == "" {
|
||||||
|
return mcp.ErrorContent(fmt.Errorf("matchers is required")), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
durationStr, _ := args["duration"].(string)
|
||||||
|
if durationStr == "" {
|
||||||
|
return mcp.ErrorContent(fmt.Errorf("duration is required")), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
author, _ := args["author"].(string)
|
||||||
|
if author == "" {
|
||||||
|
return mcp.ErrorContent(fmt.Errorf("author is required")), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
comment, _ := args["comment"].(string)
|
||||||
|
if comment == "" {
|
||||||
|
return mcp.ErrorContent(fmt.Errorf("comment is required")), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse matchers
|
||||||
|
var matchers []Matcher
|
||||||
|
if err := parseJSON(matchersJSON, &matchers); err != nil {
|
||||||
|
return mcp.ErrorContent(fmt.Errorf("invalid matchers JSON: %w", err)), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse duration
|
||||||
|
duration, err := time.ParseDuration(durationStr)
|
||||||
|
if err != nil {
|
||||||
|
return mcp.ErrorContent(fmt.Errorf("invalid duration: %w", err)), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
now := time.Now()
|
||||||
|
silence := Silence{
|
||||||
|
Matchers: matchers,
|
||||||
|
StartsAt: now,
|
||||||
|
EndsAt: now.Add(duration),
|
||||||
|
CreatedBy: author,
|
||||||
|
Comment: comment,
|
||||||
|
}
|
||||||
|
|
||||||
|
id, err := am.CreateSilence(ctx, silence)
|
||||||
|
if err != nil {
|
||||||
|
return mcp.ErrorContent(fmt.Errorf("failed to create silence: %w", err)), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var sb strings.Builder
|
||||||
|
sb.WriteString("Silence created successfully.\n\n")
|
||||||
|
sb.WriteString(fmt.Sprintf("**ID:** %s\n", id))
|
||||||
|
sb.WriteString(fmt.Sprintf("**Expires:** %s\n", silence.EndsAt.Format(time.RFC3339)))
|
||||||
|
sb.WriteString(fmt.Sprintf("**Author:** %s\n", author))
|
||||||
|
sb.WriteString(fmt.Sprintf("**Comment:** %s\n", comment))
|
||||||
|
|
||||||
|
return mcp.CallToolResult{
|
||||||
|
Content: []mcp.Content{mcp.TextContent(sb.String())},
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseJSON is a helper to unmarshal JSON from a string.
|
||||||
|
func parseJSON(s string, v interface{}) error {
|
||||||
|
return json.Unmarshal([]byte(s), v)
|
||||||
|
}
|
||||||
378
internal/monitoring/handlers_test.go
Normal file
378
internal/monitoring/handlers_test.go
Normal file
@@ -0,0 +1,378 @@
|
|||||||
|
package monitoring
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"io"
|
||||||
|
"log"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"git.t-juice.club/torjus/labmcp/internal/mcp"
|
||||||
|
)
|
||||||
|
|
||||||
|
// setupTestServer creates a test MCP server with monitoring handlers backed by test HTTP servers.
|
||||||
|
func setupTestServer(t *testing.T, promHandler, amHandler http.HandlerFunc) (*mcp.Server, func()) {
|
||||||
|
t.Helper()
|
||||||
|
|
||||||
|
promSrv := httptest.NewServer(promHandler)
|
||||||
|
amSrv := httptest.NewServer(amHandler)
|
||||||
|
|
||||||
|
logger := log.New(io.Discard, "", 0)
|
||||||
|
config := mcp.DefaultMonitoringConfig()
|
||||||
|
server := mcp.NewGenericServer(logger, config)
|
||||||
|
|
||||||
|
prom := NewPrometheusClient(promSrv.URL)
|
||||||
|
am := NewAlertmanagerClient(amSrv.URL)
|
||||||
|
RegisterHandlers(server, prom, am)
|
||||||
|
|
||||||
|
cleanup := func() {
|
||||||
|
promSrv.Close()
|
||||||
|
amSrv.Close()
|
||||||
|
}
|
||||||
|
|
||||||
|
return server, cleanup
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestHandler_ListAlerts(t *testing.T) {
|
||||||
|
server, cleanup := setupTestServer(t,
|
||||||
|
nil,
|
||||||
|
func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
_, _ = w.Write([]byte(`[
|
||||||
|
{
|
||||||
|
"annotations": {"summary": "Node is down"},
|
||||||
|
"endsAt": "2024-01-01T01:00:00Z",
|
||||||
|
"fingerprint": "fp1",
|
||||||
|
"receivers": [{"name": "default"}],
|
||||||
|
"startsAt": "2024-01-01T00:00:00Z",
|
||||||
|
"status": {"inhibitedBy": [], "silencedBy": [], "state": "active"},
|
||||||
|
"updatedAt": "2024-01-01T00:00:00Z",
|
||||||
|
"generatorURL": "",
|
||||||
|
"labels": {"alertname": "NodeDown", "severity": "critical"}
|
||||||
|
}
|
||||||
|
]`))
|
||||||
|
},
|
||||||
|
)
|
||||||
|
defer cleanup()
|
||||||
|
|
||||||
|
result := callTool(t, server, "list_alerts", map[string]interface{}{})
|
||||||
|
if result.IsError {
|
||||||
|
t.Fatalf("unexpected error: %s", result.Content[0].Text)
|
||||||
|
}
|
||||||
|
if !strings.Contains(result.Content[0].Text, "NodeDown") {
|
||||||
|
t.Errorf("expected output to contain 'NodeDown', got: %s", result.Content[0].Text)
|
||||||
|
}
|
||||||
|
if !strings.Contains(result.Content[0].Text, "1 alert") {
|
||||||
|
t.Errorf("expected output to contain '1 alert', got: %s", result.Content[0].Text)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestHandler_GetAlert(t *testing.T) {
|
||||||
|
server, cleanup := setupTestServer(t,
|
||||||
|
nil,
|
||||||
|
func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
_, _ = w.Write([]byte(`[
|
||||||
|
{
|
||||||
|
"annotations": {"summary": "Found it"},
|
||||||
|
"endsAt": "2024-01-01T01:00:00Z",
|
||||||
|
"fingerprint": "target-fp",
|
||||||
|
"receivers": [{"name": "default"}],
|
||||||
|
"startsAt": "2024-01-01T00:00:00Z",
|
||||||
|
"status": {"inhibitedBy": [], "silencedBy": [], "state": "active"},
|
||||||
|
"updatedAt": "2024-01-01T00:00:00Z",
|
||||||
|
"generatorURL": "",
|
||||||
|
"labels": {"alertname": "TestAlert", "severity": "warning"}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"annotations": {},
|
||||||
|
"endsAt": "2024-01-01T01:00:00Z",
|
||||||
|
"fingerprint": "other-fp",
|
||||||
|
"receivers": [{"name": "default"}],
|
||||||
|
"startsAt": "2024-01-01T00:00:00Z",
|
||||||
|
"status": {"inhibitedBy": [], "silencedBy": [], "state": "active"},
|
||||||
|
"updatedAt": "2024-01-01T00:00:00Z",
|
||||||
|
"generatorURL": "",
|
||||||
|
"labels": {"alertname": "OtherAlert", "severity": "info"}
|
||||||
|
}
|
||||||
|
]`))
|
||||||
|
},
|
||||||
|
)
|
||||||
|
defer cleanup()
|
||||||
|
|
||||||
|
result := callTool(t, server, "get_alert", map[string]interface{}{
|
||||||
|
"fingerprint": "target-fp",
|
||||||
|
})
|
||||||
|
if result.IsError {
|
||||||
|
t.Fatalf("unexpected error: %s", result.Content[0].Text)
|
||||||
|
}
|
||||||
|
if !strings.Contains(result.Content[0].Text, "TestAlert") {
|
||||||
|
t.Errorf("expected output to contain 'TestAlert', got: %s", result.Content[0].Text)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestHandler_GetAlertNotFound(t *testing.T) {
|
||||||
|
server, cleanup := setupTestServer(t,
|
||||||
|
nil,
|
||||||
|
func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
_, _ = w.Write([]byte(`[]`))
|
||||||
|
},
|
||||||
|
)
|
||||||
|
defer cleanup()
|
||||||
|
|
||||||
|
result := callTool(t, server, "get_alert", map[string]interface{}{
|
||||||
|
"fingerprint": "nonexistent",
|
||||||
|
})
|
||||||
|
if !result.IsError {
|
||||||
|
t.Error("expected error result for nonexistent fingerprint")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestHandler_Query(t *testing.T) {
|
||||||
|
server, cleanup := setupTestServer(t,
|
||||||
|
func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if r.URL.Path != "/api/v1/query" {
|
||||||
|
http.NotFound(w, r)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
_, _ = w.Write([]byte(`{
|
||||||
|
"status": "success",
|
||||||
|
"data": {
|
||||||
|
"resultType": "vector",
|
||||||
|
"result": [
|
||||||
|
{
|
||||||
|
"metric": {"__name__": "up", "job": "node"},
|
||||||
|
"value": [1234567890, "1"]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}`))
|
||||||
|
},
|
||||||
|
nil,
|
||||||
|
)
|
||||||
|
defer cleanup()
|
||||||
|
|
||||||
|
result := callTool(t, server, "query", map[string]interface{}{
|
||||||
|
"promql": "up",
|
||||||
|
})
|
||||||
|
if result.IsError {
|
||||||
|
t.Fatalf("unexpected error: %s", result.Content[0].Text)
|
||||||
|
}
|
||||||
|
if !strings.Contains(result.Content[0].Text, "node") {
|
||||||
|
t.Errorf("expected output to contain 'node', got: %s", result.Content[0].Text)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestHandler_ListTargets(t *testing.T) {
|
||||||
|
server, cleanup := setupTestServer(t,
|
||||||
|
func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if r.URL.Path != "/api/v1/targets" {
|
||||||
|
http.NotFound(w, r)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
_, _ = w.Write([]byte(`{
|
||||||
|
"status": "success",
|
||||||
|
"data": {
|
||||||
|
"activeTargets": [
|
||||||
|
{
|
||||||
|
"labels": {"instance": "localhost:9090", "job": "prometheus"},
|
||||||
|
"scrapePool": "prometheus",
|
||||||
|
"scrapeUrl": "http://localhost:9090/metrics",
|
||||||
|
"globalUrl": "http://localhost:9090/metrics",
|
||||||
|
"lastError": "",
|
||||||
|
"lastScrape": "2024-01-01T00:00:00Z",
|
||||||
|
"lastScrapeDuration": 0.015,
|
||||||
|
"health": "up",
|
||||||
|
"scrapeInterval": "15s",
|
||||||
|
"scrapeTimeout": "10s"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"droppedTargets": []
|
||||||
|
}
|
||||||
|
}`))
|
||||||
|
},
|
||||||
|
nil,
|
||||||
|
)
|
||||||
|
defer cleanup()
|
||||||
|
|
||||||
|
result := callTool(t, server, "list_targets", map[string]interface{}{})
|
||||||
|
if result.IsError {
|
||||||
|
t.Fatalf("unexpected error: %s", result.Content[0].Text)
|
||||||
|
}
|
||||||
|
if !strings.Contains(result.Content[0].Text, "prometheus") {
|
||||||
|
t.Errorf("expected output to contain 'prometheus', got: %s", result.Content[0].Text)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestHandler_SearchMetrics(t *testing.T) {
|
||||||
|
server, cleanup := setupTestServer(t,
|
||||||
|
func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
switch r.URL.Path {
|
||||||
|
case "/api/v1/label/__name__/values":
|
||||||
|
_, _ = w.Write([]byte(`{
|
||||||
|
"status": "success",
|
||||||
|
"data": ["node_cpu_seconds_total", "node_memory_MemTotal_bytes", "up"]
|
||||||
|
}`))
|
||||||
|
case "/api/v1/metadata":
|
||||||
|
_, _ = w.Write([]byte(`{
|
||||||
|
"status": "success",
|
||||||
|
"data": {
|
||||||
|
"node_cpu_seconds_total": [{"type": "counter", "help": "CPU time", "unit": ""}],
|
||||||
|
"node_memory_MemTotal_bytes": [{"type": "gauge", "help": "Total memory", "unit": "bytes"}]
|
||||||
|
}
|
||||||
|
}`))
|
||||||
|
default:
|
||||||
|
http.NotFound(w, r)
|
||||||
|
}
|
||||||
|
},
|
||||||
|
nil,
|
||||||
|
)
|
||||||
|
defer cleanup()
|
||||||
|
|
||||||
|
result := callTool(t, server, "search_metrics", map[string]interface{}{
|
||||||
|
"query": "node",
|
||||||
|
})
|
||||||
|
if result.IsError {
|
||||||
|
t.Fatalf("unexpected error: %s", result.Content[0].Text)
|
||||||
|
}
|
||||||
|
if !strings.Contains(result.Content[0].Text, "node_cpu") {
|
||||||
|
t.Errorf("expected output to contain 'node_cpu', got: %s", result.Content[0].Text)
|
||||||
|
}
|
||||||
|
// "up" should be filtered out since it doesn't match "node"
|
||||||
|
if strings.Contains(result.Content[0].Text, "| up |") {
|
||||||
|
t.Errorf("expected 'up' to be filtered out, got: %s", result.Content[0].Text)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestHandler_ListSilences(t *testing.T) {
|
||||||
|
server, cleanup := setupTestServer(t,
|
||||||
|
nil,
|
||||||
|
func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if r.URL.Path != "/api/v2/silences" {
|
||||||
|
http.NotFound(w, r)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
_, _ = w.Write([]byte(`[
|
||||||
|
{
|
||||||
|
"id": "s1",
|
||||||
|
"matchers": [{"name": "alertname", "value": "Test", "isRegex": false}],
|
||||||
|
"startsAt": "2024-01-01T00:00:00Z",
|
||||||
|
"endsAt": "2024-01-01T02:00:00Z",
|
||||||
|
"createdBy": "admin",
|
||||||
|
"comment": "Testing",
|
||||||
|
"status": {"state": "active"}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "s2",
|
||||||
|
"matchers": [{"name": "job", "value": "node", "isRegex": false}],
|
||||||
|
"startsAt": "2023-01-01T00:00:00Z",
|
||||||
|
"endsAt": "2023-01-01T02:00:00Z",
|
||||||
|
"createdBy": "admin",
|
||||||
|
"comment": "Old",
|
||||||
|
"status": {"state": "expired"}
|
||||||
|
}
|
||||||
|
]`))
|
||||||
|
},
|
||||||
|
)
|
||||||
|
defer cleanup()
|
||||||
|
|
||||||
|
result := callTool(t, server, "list_silences", map[string]interface{}{})
|
||||||
|
if result.IsError {
|
||||||
|
t.Fatalf("unexpected error: %s", result.Content[0].Text)
|
||||||
|
}
|
||||||
|
// Should show active silence but filter out expired
|
||||||
|
if !strings.Contains(result.Content[0].Text, "s1") {
|
||||||
|
t.Errorf("expected active silence s1 in output, got: %s", result.Content[0].Text)
|
||||||
|
}
|
||||||
|
if strings.Contains(result.Content[0].Text, "s2") {
|
||||||
|
t.Errorf("expected expired silence s2 to be filtered out, got: %s", result.Content[0].Text)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestHandler_ToolCount(t *testing.T) {
|
||||||
|
server, cleanup := setupTestServer(t,
|
||||||
|
func(w http.ResponseWriter, r *http.Request) {},
|
||||||
|
func(w http.ResponseWriter, r *http.Request) {},
|
||||||
|
)
|
||||||
|
defer cleanup()
|
||||||
|
|
||||||
|
// Send a tools/list request
|
||||||
|
req := &mcp.Request{
|
||||||
|
JSONRPC: "2.0",
|
||||||
|
ID: 1,
|
||||||
|
Method: "tools/list",
|
||||||
|
}
|
||||||
|
resp := server.HandleRequest(context.Background(), req)
|
||||||
|
if resp == nil {
|
||||||
|
t.Fatal("expected response, got nil")
|
||||||
|
}
|
||||||
|
if resp.Error != nil {
|
||||||
|
t.Fatalf("unexpected error: %s", resp.Error.Message)
|
||||||
|
}
|
||||||
|
|
||||||
|
resultJSON, err := json.Marshal(resp.Result)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("failed to marshal result: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var listResult mcp.ListToolsResult
|
||||||
|
if err := json.Unmarshal(resultJSON, &listResult); err != nil {
|
||||||
|
t.Fatalf("failed to unmarshal result: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(listResult.Tools) != 8 {
|
||||||
|
t.Errorf("expected 8 tools, got %d", len(listResult.Tools))
|
||||||
|
for _, tool := range listResult.Tools {
|
||||||
|
t.Logf(" tool: %s", tool.Name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// callTool is a test helper that calls a tool through the MCP server.
|
||||||
|
func callTool(t *testing.T, server *mcp.Server, name string, args map[string]interface{}) mcp.CallToolResult {
|
||||||
|
t.Helper()
|
||||||
|
|
||||||
|
params := mcp.CallToolParams{
|
||||||
|
Name: name,
|
||||||
|
Arguments: args,
|
||||||
|
}
|
||||||
|
paramsJSON, err := json.Marshal(params)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("failed to marshal params: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
req := &mcp.Request{
|
||||||
|
JSONRPC: "2.0",
|
||||||
|
ID: 1,
|
||||||
|
Method: "tools/call",
|
||||||
|
Params: paramsJSON,
|
||||||
|
}
|
||||||
|
|
||||||
|
resp := server.HandleRequest(context.Background(), req)
|
||||||
|
if resp == nil {
|
||||||
|
t.Fatal("expected response, got nil")
|
||||||
|
}
|
||||||
|
if resp.Error != nil {
|
||||||
|
t.Fatalf("JSON-RPC error: %s", resp.Error.Message)
|
||||||
|
}
|
||||||
|
|
||||||
|
resultJSON, err := json.Marshal(resp.Result)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("failed to marshal result: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var result mcp.CallToolResult
|
||||||
|
if err := json.Unmarshal(resultJSON, &result); err != nil {
|
||||||
|
t.Fatalf("failed to unmarshal result: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return result
|
||||||
|
}
|
||||||
135
internal/monitoring/prometheus.go
Normal file
135
internal/monitoring/prometheus.go
Normal file
@@ -0,0 +1,135 @@
|
|||||||
|
package monitoring
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"net/url"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// PrometheusClient is an HTTP client for the Prometheus API.
|
||||||
|
type PrometheusClient struct {
|
||||||
|
baseURL string
|
||||||
|
httpClient *http.Client
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewPrometheusClient creates a new Prometheus API client.
|
||||||
|
func NewPrometheusClient(baseURL string) *PrometheusClient {
|
||||||
|
return &PrometheusClient{
|
||||||
|
baseURL: strings.TrimRight(baseURL, "/"),
|
||||||
|
httpClient: &http.Client{
|
||||||
|
Timeout: 30 * time.Second,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Query executes an instant PromQL query. If ts is zero, the current time is used.
|
||||||
|
func (c *PrometheusClient) Query(ctx context.Context, promql string, ts time.Time) (*PromQueryData, error) {
|
||||||
|
params := url.Values{}
|
||||||
|
params.Set("query", promql)
|
||||||
|
if !ts.IsZero() {
|
||||||
|
params.Set("time", fmt.Sprintf("%d", ts.Unix()))
|
||||||
|
}
|
||||||
|
|
||||||
|
body, err := c.get(ctx, "/api/v1/query", params)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("query failed: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var data PromQueryData
|
||||||
|
if err := json.Unmarshal(body, &data); err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to parse query data: %w", err)
|
||||||
|
}
|
||||||
|
return &data, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// LabelValues returns all values for a given label name.
|
||||||
|
func (c *PrometheusClient) LabelValues(ctx context.Context, label string) ([]string, error) {
|
||||||
|
path := fmt.Sprintf("/api/v1/label/%s/values", url.PathEscape(label))
|
||||||
|
body, err := c.get(ctx, path, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("label values failed: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var values []string
|
||||||
|
if err := json.Unmarshal(body, &values); err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to parse label values: %w", err)
|
||||||
|
}
|
||||||
|
return values, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Metadata returns metadata for metrics. If metric is empty, returns metadata for all metrics.
|
||||||
|
func (c *PrometheusClient) Metadata(ctx context.Context, metric string) (map[string][]PromMetadata, error) {
|
||||||
|
params := url.Values{}
|
||||||
|
if metric != "" {
|
||||||
|
params.Set("metric", metric)
|
||||||
|
}
|
||||||
|
|
||||||
|
body, err := c.get(ctx, "/api/v1/metadata", params)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("metadata failed: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var metadata map[string][]PromMetadata
|
||||||
|
if err := json.Unmarshal(body, &metadata); err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to parse metadata: %w", err)
|
||||||
|
}
|
||||||
|
return metadata, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Targets returns the current scrape targets.
|
||||||
|
func (c *PrometheusClient) Targets(ctx context.Context) (*PromTargetsData, error) {
|
||||||
|
body, err := c.get(ctx, "/api/v1/targets", nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("targets failed: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var data PromTargetsData
|
||||||
|
if err := json.Unmarshal(body, &data); err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to parse targets data: %w", err)
|
||||||
|
}
|
||||||
|
return &data, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// get performs a GET request and returns the "data" field from the Prometheus response envelope.
|
||||||
|
func (c *PrometheusClient) get(ctx context.Context, path string, params url.Values) (json.RawMessage, error) {
|
||||||
|
u := c.baseURL + path
|
||||||
|
if len(params) > 0 {
|
||||||
|
u += "?" + params.Encode()
|
||||||
|
}
|
||||||
|
|
||||||
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, u, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to create request: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := c.httpClient.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("request failed: %w", err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close() //nolint:errcheck // cleanup on exit
|
||||||
|
|
||||||
|
body, err := io.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to read response: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
return nil, fmt.Errorf("unexpected status %d: %s", resp.StatusCode, string(body))
|
||||||
|
}
|
||||||
|
|
||||||
|
var promResp PromResponse
|
||||||
|
if err := json.Unmarshal(body, &promResp); err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to parse response: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if promResp.Status != "success" {
|
||||||
|
return nil, fmt.Errorf("prometheus error (%s): %s", promResp.ErrorType, promResp.Error)
|
||||||
|
}
|
||||||
|
|
||||||
|
return promResp.Data, nil
|
||||||
|
}
|
||||||
209
internal/monitoring/prometheus_test.go
Normal file
209
internal/monitoring/prometheus_test.go
Normal file
@@ -0,0 +1,209 @@
|
|||||||
|
package monitoring
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestPrometheusClient_Query(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if r.URL.Path != "/api/v1/query" {
|
||||||
|
t.Errorf("unexpected path: %s", r.URL.Path)
|
||||||
|
}
|
||||||
|
if r.URL.Query().Get("query") != "up" {
|
||||||
|
t.Errorf("unexpected query param: %s", r.URL.Query().Get("query"))
|
||||||
|
}
|
||||||
|
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
_, _ = w.Write([]byte(`{
|
||||||
|
"status": "success",
|
||||||
|
"data": {
|
||||||
|
"resultType": "vector",
|
||||||
|
"result": [
|
||||||
|
{
|
||||||
|
"metric": {"__name__": "up", "job": "prometheus", "instance": "localhost:9090"},
|
||||||
|
"value": [1234567890, "1"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"metric": {"__name__": "up", "job": "node", "instance": "localhost:9100"},
|
||||||
|
"value": [1234567890, "0"]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}`))
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
client := NewPrometheusClient(srv.URL)
|
||||||
|
data, err := client.Query(context.Background(), "up", time.Time{})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if data.ResultType != "vector" {
|
||||||
|
t.Errorf("expected resultType=vector, got %s", data.ResultType)
|
||||||
|
}
|
||||||
|
if len(data.Result) != 2 {
|
||||||
|
t.Fatalf("expected 2 results, got %d", len(data.Result))
|
||||||
|
}
|
||||||
|
if data.Result[0].Metric["job"] != "prometheus" {
|
||||||
|
t.Errorf("expected job=prometheus, got %s", data.Result[0].Metric["job"])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPrometheusClient_QueryError(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
_, _ = w.Write([]byte(`{
|
||||||
|
"status": "error",
|
||||||
|
"errorType": "bad_data",
|
||||||
|
"error": "invalid expression"
|
||||||
|
}`))
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
client := NewPrometheusClient(srv.URL)
|
||||||
|
_, err := client.Query(context.Background(), "invalid{", time.Time{})
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("expected error, got nil")
|
||||||
|
}
|
||||||
|
if !contains(err.Error(), "invalid expression") {
|
||||||
|
t.Errorf("expected error to contain 'invalid expression', got: %s", err.Error())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPrometheusClient_LabelValues(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if r.URL.Path != "/api/v1/label/__name__/values" {
|
||||||
|
t.Errorf("unexpected path: %s", r.URL.Path)
|
||||||
|
}
|
||||||
|
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
_, _ = w.Write([]byte(`{
|
||||||
|
"status": "success",
|
||||||
|
"data": ["up", "node_cpu_seconds_total", "prometheus_build_info"]
|
||||||
|
}`))
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
client := NewPrometheusClient(srv.URL)
|
||||||
|
values, err := client.LabelValues(context.Background(), "__name__")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(values) != 3 {
|
||||||
|
t.Fatalf("expected 3 values, got %d", len(values))
|
||||||
|
}
|
||||||
|
if values[0] != "up" {
|
||||||
|
t.Errorf("expected first value=up, got %s", values[0])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPrometheusClient_Metadata(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if r.URL.Path != "/api/v1/metadata" {
|
||||||
|
t.Errorf("unexpected path: %s", r.URL.Path)
|
||||||
|
}
|
||||||
|
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
_, _ = w.Write([]byte(`{
|
||||||
|
"status": "success",
|
||||||
|
"data": {
|
||||||
|
"up": [{"type": "gauge", "help": "Whether the target is up.", "unit": ""}],
|
||||||
|
"node_cpu_seconds_total": [{"type": "counter", "help": "CPU seconds spent.", "unit": "seconds"}]
|
||||||
|
}
|
||||||
|
}`))
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
client := NewPrometheusClient(srv.URL)
|
||||||
|
metadata, err := client.Metadata(context.Background(), "")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(metadata) != 2 {
|
||||||
|
t.Fatalf("expected 2 metrics, got %d", len(metadata))
|
||||||
|
}
|
||||||
|
if metadata["up"][0].Type != "gauge" {
|
||||||
|
t.Errorf("expected up type=gauge, got %s", metadata["up"][0].Type)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPrometheusClient_Targets(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if r.URL.Path != "/api/v1/targets" {
|
||||||
|
t.Errorf("unexpected path: %s", r.URL.Path)
|
||||||
|
}
|
||||||
|
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
_, _ = w.Write([]byte(`{
|
||||||
|
"status": "success",
|
||||||
|
"data": {
|
||||||
|
"activeTargets": [
|
||||||
|
{
|
||||||
|
"labels": {"instance": "localhost:9090", "job": "prometheus"},
|
||||||
|
"scrapePool": "prometheus",
|
||||||
|
"scrapeUrl": "http://localhost:9090/metrics",
|
||||||
|
"globalUrl": "http://localhost:9090/metrics",
|
||||||
|
"lastError": "",
|
||||||
|
"lastScrape": "2024-01-01T00:00:00Z",
|
||||||
|
"lastScrapeDuration": 0.01,
|
||||||
|
"health": "up",
|
||||||
|
"scrapeInterval": "15s",
|
||||||
|
"scrapeTimeout": "10s"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"droppedTargets": []
|
||||||
|
}
|
||||||
|
}`))
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
client := NewPrometheusClient(srv.URL)
|
||||||
|
data, err := client.Targets(context.Background())
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(data.ActiveTargets) != 1 {
|
||||||
|
t.Fatalf("expected 1 active target, got %d", len(data.ActiveTargets))
|
||||||
|
}
|
||||||
|
if data.ActiveTargets[0].Health != "up" {
|
||||||
|
t.Errorf("expected health=up, got %s", data.ActiveTargets[0].Health)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPrometheusClient_HTTPError(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.WriteHeader(http.StatusInternalServerError)
|
||||||
|
_, _ = w.Write([]byte("internal error"))
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
client := NewPrometheusClient(srv.URL)
|
||||||
|
_, err := client.Query(context.Background(), "up", time.Time{})
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("expected error, got nil")
|
||||||
|
}
|
||||||
|
if !contains(err.Error(), "500") {
|
||||||
|
t.Errorf("expected error to contain status code, got: %s", err.Error())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func contains(s, substr string) bool {
|
||||||
|
return len(s) >= len(substr) && searchString(s, substr)
|
||||||
|
}
|
||||||
|
|
||||||
|
func searchString(s, substr string) bool {
|
||||||
|
for i := 0; i <= len(s)-len(substr); i++ {
|
||||||
|
if s[i:i+len(substr)] == substr {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
120
internal/monitoring/types.go
Normal file
120
internal/monitoring/types.go
Normal file
@@ -0,0 +1,120 @@
|
|||||||
|
package monitoring
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Prometheus API response types
|
||||||
|
|
||||||
|
// PromResponse is the standard Prometheus API response envelope.
|
||||||
|
type PromResponse struct {
|
||||||
|
Status string `json:"status"`
|
||||||
|
Data json.RawMessage `json:"data,omitempty"`
|
||||||
|
ErrorType string `json:"errorType,omitempty"`
|
||||||
|
Error string `json:"error,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// PromQueryData represents the data field for query results.
|
||||||
|
type PromQueryData struct {
|
||||||
|
ResultType string `json:"resultType"`
|
||||||
|
Result []PromInstantVector `json:"result"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// PromInstantVector represents a single instant vector result.
|
||||||
|
type PromInstantVector struct {
|
||||||
|
Metric map[string]string `json:"metric"`
|
||||||
|
Value [2]interface{} `json:"value"` // [timestamp, value_string]
|
||||||
|
}
|
||||||
|
|
||||||
|
// PromScalar represents a scalar query result.
|
||||||
|
type PromScalar [2]interface{} // [timestamp, value_string]
|
||||||
|
|
||||||
|
// PromMetadata represents metadata for a single metric.
|
||||||
|
type PromMetadata struct {
|
||||||
|
Type string `json:"type"`
|
||||||
|
Help string `json:"help"`
|
||||||
|
Unit string `json:"unit"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// PromTarget represents a single scrape target.
|
||||||
|
type PromTarget struct {
|
||||||
|
DiscoveredLabels map[string]string `json:"discoveredLabels"`
|
||||||
|
Labels map[string]string `json:"labels"`
|
||||||
|
ScrapePool string `json:"scrapePool"`
|
||||||
|
ScrapeURL string `json:"scrapeUrl"`
|
||||||
|
GlobalURL string `json:"globalUrl"`
|
||||||
|
LastError string `json:"lastError"`
|
||||||
|
LastScrape time.Time `json:"lastScrape"`
|
||||||
|
LastScrapeDuration float64 `json:"lastScrapeDuration"`
|
||||||
|
Health string `json:"health"`
|
||||||
|
ScrapeInterval string `json:"scrapeInterval"`
|
||||||
|
ScrapeTimeout string `json:"scrapeTimeout"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// PromTargetsData represents the data field for targets results.
|
||||||
|
type PromTargetsData struct {
|
||||||
|
ActiveTargets []PromTarget `json:"activeTargets"`
|
||||||
|
DroppedTargets []PromTarget `json:"droppedTargets"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Alertmanager API response types
|
||||||
|
|
||||||
|
// Alert represents an alert from the Alertmanager API v2.
|
||||||
|
type Alert struct {
|
||||||
|
Annotations map[string]string `json:"annotations"`
|
||||||
|
EndsAt time.Time `json:"endsAt"`
|
||||||
|
Fingerprint string `json:"fingerprint"`
|
||||||
|
Receivers []AlertReceiver `json:"receivers"`
|
||||||
|
StartsAt time.Time `json:"startsAt"`
|
||||||
|
Status AlertStatus `json:"status"`
|
||||||
|
UpdatedAt time.Time `json:"updatedAt"`
|
||||||
|
GeneratorURL string `json:"generatorURL"`
|
||||||
|
Labels map[string]string `json:"labels"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// AlertReceiver represents an alert receiver.
|
||||||
|
type AlertReceiver struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// AlertStatus represents the status of an alert.
|
||||||
|
type AlertStatus struct {
|
||||||
|
InhibitedBy []string `json:"inhibitedBy"`
|
||||||
|
SilencedBy []string `json:"silencedBy"`
|
||||||
|
State string `json:"state"` // "active", "suppressed", "unprocessed"
|
||||||
|
}
|
||||||
|
|
||||||
|
// AlertFilters contains filters for listing alerts.
|
||||||
|
type AlertFilters struct {
|
||||||
|
Active *bool
|
||||||
|
Silenced *bool
|
||||||
|
Inhibited *bool
|
||||||
|
Unprocessed *bool
|
||||||
|
Filter []string // PromQL-style label matchers, e.g. {severity="critical"}
|
||||||
|
Receiver string
|
||||||
|
}
|
||||||
|
|
||||||
|
// Silence represents a silence from the Alertmanager API v2.
|
||||||
|
type Silence struct {
|
||||||
|
ID string `json:"id,omitempty"`
|
||||||
|
Matchers []Matcher `json:"matchers"`
|
||||||
|
StartsAt time.Time `json:"startsAt"`
|
||||||
|
EndsAt time.Time `json:"endsAt"`
|
||||||
|
CreatedBy string `json:"createdBy"`
|
||||||
|
Comment string `json:"comment"`
|
||||||
|
Status *SilenceStatus `json:"status,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// SilenceStatus represents the status of a silence.
|
||||||
|
type SilenceStatus struct {
|
||||||
|
State string `json:"state"` // "active", "pending", "expired"
|
||||||
|
}
|
||||||
|
|
||||||
|
// Matcher represents a label matcher for silences.
|
||||||
|
type Matcher struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
Value string `json:"value"`
|
||||||
|
IsRegex bool `json:"isRegex"`
|
||||||
|
IsEqual *bool `json:"isEqual,omitempty"`
|
||||||
|
}
|
||||||
139
nix/lab-monitoring-module.nix
Normal file
139
nix/lab-monitoring-module.nix
Normal file
@@ -0,0 +1,139 @@
|
|||||||
|
{ config, lib, pkgs, ... }:
|
||||||
|
|
||||||
|
let
|
||||||
|
cfg = config.services.lab-monitoring;
|
||||||
|
|
||||||
|
mkHttpFlags = httpCfg: lib.concatStringsSep " " ([
|
||||||
|
"--transport http"
|
||||||
|
"--http-address '${httpCfg.address}'"
|
||||||
|
"--http-endpoint '${httpCfg.endpoint}'"
|
||||||
|
"--session-ttl '${httpCfg.sessionTTL}'"
|
||||||
|
] ++ lib.optionals (httpCfg.allowedOrigins != []) (
|
||||||
|
map (origin: "--allowed-origins '${origin}'") httpCfg.allowedOrigins
|
||||||
|
) ++ lib.optionals httpCfg.tls.enable [
|
||||||
|
"--tls-cert '${httpCfg.tls.certFile}'"
|
||||||
|
"--tls-key '${httpCfg.tls.keyFile}'"
|
||||||
|
]);
|
||||||
|
in
|
||||||
|
{
|
||||||
|
options.services.lab-monitoring = {
|
||||||
|
enable = lib.mkEnableOption "Lab Monitoring MCP server";
|
||||||
|
|
||||||
|
package = lib.mkPackageOption pkgs "lab-monitoring" { };
|
||||||
|
|
||||||
|
prometheusUrl = lib.mkOption {
|
||||||
|
type = lib.types.str;
|
||||||
|
default = "http://localhost:9090";
|
||||||
|
description = "Prometheus base URL.";
|
||||||
|
};
|
||||||
|
|
||||||
|
alertmanagerUrl = lib.mkOption {
|
||||||
|
type = lib.types.str;
|
||||||
|
default = "http://localhost:9093";
|
||||||
|
description = "Alertmanager base URL.";
|
||||||
|
};
|
||||||
|
|
||||||
|
http = {
|
||||||
|
address = lib.mkOption {
|
||||||
|
type = lib.types.str;
|
||||||
|
default = "127.0.0.1:8084";
|
||||||
|
description = "HTTP listen address for the MCP server.";
|
||||||
|
};
|
||||||
|
|
||||||
|
endpoint = lib.mkOption {
|
||||||
|
type = lib.types.str;
|
||||||
|
default = "/mcp";
|
||||||
|
description = "HTTP endpoint path for MCP requests.";
|
||||||
|
};
|
||||||
|
|
||||||
|
allowedOrigins = lib.mkOption {
|
||||||
|
type = lib.types.listOf lib.types.str;
|
||||||
|
default = [ ];
|
||||||
|
description = "Allowed Origin headers for CORS.";
|
||||||
|
};
|
||||||
|
|
||||||
|
sessionTTL = lib.mkOption {
|
||||||
|
type = lib.types.str;
|
||||||
|
default = "30m";
|
||||||
|
description = "Session TTL for HTTP transport.";
|
||||||
|
};
|
||||||
|
|
||||||
|
tls = {
|
||||||
|
enable = lib.mkEnableOption "TLS for HTTP transport";
|
||||||
|
|
||||||
|
certFile = lib.mkOption {
|
||||||
|
type = lib.types.nullOr lib.types.path;
|
||||||
|
default = null;
|
||||||
|
description = "Path to TLS certificate file.";
|
||||||
|
};
|
||||||
|
|
||||||
|
keyFile = lib.mkOption {
|
||||||
|
type = lib.types.nullOr lib.types.path;
|
||||||
|
default = null;
|
||||||
|
description = "Path to TLS private key file.";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
openFirewall = lib.mkOption {
|
||||||
|
type = lib.types.bool;
|
||||||
|
default = false;
|
||||||
|
description = "Whether to open the firewall for the MCP HTTP server.";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
config = lib.mkIf cfg.enable {
|
||||||
|
assertions = [
|
||||||
|
{
|
||||||
|
assertion = !cfg.http.tls.enable || (cfg.http.tls.certFile != null && cfg.http.tls.keyFile != null);
|
||||||
|
message = "services.lab-monitoring.http.tls: both certFile and keyFile must be set when TLS is enabled";
|
||||||
|
}
|
||||||
|
];
|
||||||
|
|
||||||
|
systemd.services.lab-monitoring = {
|
||||||
|
description = "Lab Monitoring MCP Server";
|
||||||
|
wantedBy = [ "multi-user.target" ];
|
||||||
|
after = [ "network.target" ];
|
||||||
|
|
||||||
|
environment = {
|
||||||
|
PROMETHEUS_URL = cfg.prometheusUrl;
|
||||||
|
ALERTMANAGER_URL = cfg.alertmanagerUrl;
|
||||||
|
};
|
||||||
|
|
||||||
|
script = let
|
||||||
|
httpFlags = mkHttpFlags cfg.http;
|
||||||
|
in ''
|
||||||
|
exec ${cfg.package}/bin/lab-monitoring serve ${httpFlags}
|
||||||
|
'';
|
||||||
|
|
||||||
|
serviceConfig = {
|
||||||
|
Type = "simple";
|
||||||
|
DynamicUser = true;
|
||||||
|
Restart = "on-failure";
|
||||||
|
RestartSec = "5s";
|
||||||
|
|
||||||
|
# Hardening
|
||||||
|
NoNewPrivileges = true;
|
||||||
|
ProtectSystem = "strict";
|
||||||
|
ProtectHome = true;
|
||||||
|
PrivateTmp = true;
|
||||||
|
PrivateDevices = true;
|
||||||
|
ProtectKernelTunables = true;
|
||||||
|
ProtectKernelModules = true;
|
||||||
|
ProtectControlGroups = true;
|
||||||
|
RestrictNamespaces = true;
|
||||||
|
RestrictRealtime = true;
|
||||||
|
RestrictSUIDSGID = true;
|
||||||
|
MemoryDenyWriteExecute = true;
|
||||||
|
LockPersonality = true;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
networking.firewall = lib.mkIf cfg.openFirewall (let
|
||||||
|
addressParts = lib.splitString ":" cfg.http.address;
|
||||||
|
port = lib.toInt (lib.last addressParts);
|
||||||
|
in {
|
||||||
|
allowedTCPPorts = [ port ];
|
||||||
|
});
|
||||||
|
};
|
||||||
|
}
|
||||||
@@ -7,7 +7,7 @@
|
|||||||
|
|
||||||
buildGoModule {
|
buildGoModule {
|
||||||
inherit pname src;
|
inherit pname src;
|
||||||
version = "0.2.0";
|
version = "0.2.1";
|
||||||
|
|
||||||
vendorHash = "sha256-D0KIxQC9ctIAaHBFTvkhBE06uOZwDUcIw8471Ug2doY=";
|
vendorHash = "sha256-D0KIxQC9ctIAaHBFTvkhBE06uOZwDUcIw8471Ug2doY=";
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user