feat: add lab-monitoring MCP server for Prometheus and Alertmanager

New MCP server that queries live Prometheus and Alertmanager HTTP APIs
with 8 tools: list_alerts, get_alert, search_metrics, get_metric_metadata,
query (PromQL), list_targets, list_silences, and create_silence.

Extends the MCP core with ModeCustom and NewGenericServer for servers
that don't require a database. Includes CLI with direct commands
(alerts, query, targets, metrics), NixOS module, and comprehensive
httptest-based tests.

Bumps existing binaries to 0.2.1 due to shared internal/mcp change.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-02-04 23:11:53 +01:00
parent 0bd4ed778a
commit 1755364bba
19 changed files with 2567 additions and 22 deletions

View File

@@ -20,7 +20,7 @@ import (
const (
defaultDatabase = "sqlite://hm-options.db"
version = "0.2.0"
version = "0.2.1"
)
func main() {

369
cmd/lab-monitoring/main.go Normal file
View File

@@ -0,0 +1,369 @@
package main
import (
"context"
"fmt"
"log"
"os"
"os/signal"
"syscall"
"time"
"github.com/urfave/cli/v2"
"git.t-juice.club/torjus/labmcp/internal/mcp"
"git.t-juice.club/torjus/labmcp/internal/monitoring"
)
const version = "0.1.0"
func main() {
app := &cli.App{
Name: "lab-monitoring",
Usage: "MCP server for Prometheus and Alertmanager monitoring",
Version: version,
Flags: []cli.Flag{
&cli.StringFlag{
Name: "prometheus-url",
Usage: "Prometheus base URL",
EnvVars: []string{"PROMETHEUS_URL"},
Value: "http://localhost:9090",
},
&cli.StringFlag{
Name: "alertmanager-url",
Usage: "Alertmanager base URL",
EnvVars: []string{"ALERTMANAGER_URL"},
Value: "http://localhost:9093",
},
},
Commands: []*cli.Command{
serveCommand(),
alertsCommand(),
queryCommand(),
targetsCommand(),
metricsCommand(),
},
}
if err := app.Run(os.Args); err != nil {
log.Fatal(err)
}
}
func serveCommand() *cli.Command {
return &cli.Command{
Name: "serve",
Usage: "Run MCP server for lab monitoring",
Flags: []cli.Flag{
&cli.StringFlag{
Name: "transport",
Aliases: []string{"t"},
Usage: "Transport type: 'stdio' or 'http'",
Value: "stdio",
},
&cli.StringFlag{
Name: "http-address",
Usage: "HTTP listen address",
Value: "127.0.0.1:8084",
},
&cli.StringFlag{
Name: "http-endpoint",
Usage: "HTTP endpoint path",
Value: "/mcp",
},
&cli.StringSliceFlag{
Name: "allowed-origins",
Usage: "Allowed Origin headers for CORS",
},
&cli.StringFlag{
Name: "tls-cert",
Usage: "TLS certificate file",
},
&cli.StringFlag{
Name: "tls-key",
Usage: "TLS key file",
},
&cli.DurationFlag{
Name: "session-ttl",
Usage: "Session TTL for HTTP transport",
Value: 30 * time.Minute,
},
},
Action: func(c *cli.Context) error {
return runServe(c)
},
}
}
func alertsCommand() *cli.Command {
return &cli.Command{
Name: "alerts",
Usage: "List alerts from Alertmanager",
Flags: []cli.Flag{
&cli.StringFlag{
Name: "state",
Usage: "Filter by state: active, suppressed, unprocessed",
},
&cli.StringFlag{
Name: "severity",
Usage: "Filter by severity label",
},
},
Action: func(c *cli.Context) error {
return runAlerts(c)
},
}
}
func queryCommand() *cli.Command {
return &cli.Command{
Name: "query",
Usage: "Execute an instant PromQL query",
ArgsUsage: "<promql>",
Action: func(c *cli.Context) error {
if c.NArg() < 1 {
return fmt.Errorf("promql expression required")
}
return runQuery(c, c.Args().First())
},
}
}
func targetsCommand() *cli.Command {
return &cli.Command{
Name: "targets",
Usage: "List scrape targets",
Action: func(c *cli.Context) error {
return runTargets(c)
},
}
}
func metricsCommand() *cli.Command {
return &cli.Command{
Name: "metrics",
Usage: "Search metric names",
ArgsUsage: "<search>",
Flags: []cli.Flag{
&cli.IntFlag{
Name: "limit",
Aliases: []string{"n"},
Usage: "Maximum number of results",
Value: 50,
},
},
Action: func(c *cli.Context) error {
query := ""
if c.NArg() > 0 {
query = c.Args().First()
}
return runMetrics(c, query)
},
}
}
func runServe(c *cli.Context) error {
ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
defer cancel()
logger := log.New(os.Stderr, "[mcp] ", log.LstdFlags)
config := mcp.DefaultMonitoringConfig()
server := mcp.NewGenericServer(logger, config)
prom := monitoring.NewPrometheusClient(c.String("prometheus-url"))
am := monitoring.NewAlertmanagerClient(c.String("alertmanager-url"))
monitoring.RegisterHandlers(server, prom, am)
transport := c.String("transport")
switch transport {
case "stdio":
logger.Println("Starting lab-monitoring MCP server on stdio...")
return server.Run(ctx, os.Stdin, os.Stdout)
case "http":
httpConfig := mcp.HTTPConfig{
Address: c.String("http-address"),
Endpoint: c.String("http-endpoint"),
AllowedOrigins: c.StringSlice("allowed-origins"),
SessionTTL: c.Duration("session-ttl"),
TLSCertFile: c.String("tls-cert"),
TLSKeyFile: c.String("tls-key"),
}
httpTransport := mcp.NewHTTPTransport(server, httpConfig)
return httpTransport.Run(ctx)
default:
return fmt.Errorf("unknown transport: %s (use 'stdio' or 'http')", transport)
}
}
func runAlerts(c *cli.Context) error {
ctx := context.Background()
am := monitoring.NewAlertmanagerClient(c.String("alertmanager-url"))
filters := monitoring.AlertFilters{}
if state := c.String("state"); state != "" {
switch state {
case "active":
active := true
filters.Active = &active
silenced := false
filters.Silenced = &silenced
inhibited := false
filters.Inhibited = &inhibited
case "suppressed":
active := false
filters.Active = &active
case "unprocessed":
unprocessed := true
filters.Unprocessed = &unprocessed
}
}
if severity := c.String("severity"); severity != "" {
filters.Filter = append(filters.Filter, fmt.Sprintf(`severity="%s"`, severity))
}
alerts, err := am.ListAlerts(ctx, filters)
if err != nil {
return fmt.Errorf("failed to list alerts: %w", err)
}
if len(alerts) == 0 {
fmt.Println("No alerts found.")
return nil
}
for _, a := range alerts {
state := a.Status.State
severity := a.Labels["severity"]
name := a.Labels["alertname"]
fmt.Printf("[%s] %s (severity=%s, fingerprint=%s)\n", state, name, severity, a.Fingerprint)
for k, v := range a.Annotations {
fmt.Printf(" %s: %s\n", k, v)
}
}
return nil
}
func runQuery(c *cli.Context, promql string) error {
ctx := context.Background()
prom := monitoring.NewPrometheusClient(c.String("prometheus-url"))
data, err := prom.Query(ctx, promql, time.Time{})
if err != nil {
return fmt.Errorf("query failed: %w", err)
}
for _, r := range data.Result {
labels := ""
for k, v := range r.Metric {
if labels != "" {
labels += ", "
}
labels += fmt.Sprintf("%s=%q", k, v)
}
value := ""
if len(r.Value) >= 2 {
if v, ok := r.Value[1].(string); ok {
value = v
}
}
fmt.Printf("{%s} %s\n", labels, value)
}
return nil
}
func runTargets(c *cli.Context) error {
ctx := context.Background()
prom := monitoring.NewPrometheusClient(c.String("prometheus-url"))
data, err := prom.Targets(ctx)
if err != nil {
return fmt.Errorf("failed to fetch targets: %w", err)
}
if len(data.ActiveTargets) == 0 {
fmt.Println("No active targets.")
return nil
}
for _, t := range data.ActiveTargets {
job := t.Labels["job"]
instance := t.Labels["instance"]
fmt.Printf("[%s] %s/%s (last scrape: %s, duration: %.3fs)\n",
t.Health, job, instance, t.LastScrape.Format("15:04:05"), t.LastScrapeDuration)
if t.LastError != "" {
fmt.Printf(" error: %s\n", t.LastError)
}
}
return nil
}
func runMetrics(c *cli.Context, query string) error {
ctx := context.Background()
prom := monitoring.NewPrometheusClient(c.String("prometheus-url"))
names, err := prom.LabelValues(ctx, "__name__")
if err != nil {
return fmt.Errorf("failed to fetch metric names: %w", err)
}
limit := c.Int("limit")
count := 0
for _, name := range names {
if query != "" {
// Simple case-insensitive substring match
if !containsIgnoreCase(name, query) {
continue
}
}
fmt.Println(name)
count++
if count >= limit {
fmt.Printf("... (showing %d of matching metrics, use --limit to see more)\n", limit)
break
}
}
if count == 0 {
fmt.Printf("No metrics found matching '%s'\n", query)
}
return nil
}
func containsIgnoreCase(s, substr string) bool {
sLower := make([]byte, len(s))
subLower := make([]byte, len(substr))
for i := range s {
if s[i] >= 'A' && s[i] <= 'Z' {
sLower[i] = s[i] + 32
} else {
sLower[i] = s[i]
}
}
for i := range substr {
if substr[i] >= 'A' && substr[i] <= 'Z' {
subLower[i] = substr[i] + 32
} else {
subLower[i] = substr[i]
}
}
for i := 0; i <= len(sLower)-len(subLower); i++ {
match := true
for j := range subLower {
if sLower[i+j] != subLower[j] {
match = false
break
}
}
if match {
return true
}
}
return false
}

View File

@@ -19,7 +19,7 @@ import (
const (
defaultDatabase = "sqlite://nixos-options.db"
version = "0.2.0"
version = "0.2.1"
)
func main() {

View File

@@ -20,7 +20,7 @@ import (
const (
defaultDatabase = "sqlite://nixpkgs-search.db"
version = "0.2.0"
version = "0.2.1"
)
func main() {