Add InstructionsFunc callback to ServerConfig, called during each initialize handshake to generate dynamic instructions. The lab-monitoring server uses this to query Alertmanager and include a count of active non-silenced alerts, so the LLM can proactively inform the user. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
375 lines
8.2 KiB
Go
375 lines
8.2 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"log"
|
|
"os"
|
|
"os/signal"
|
|
"syscall"
|
|
"time"
|
|
|
|
"github.com/urfave/cli/v2"
|
|
|
|
"git.t-juice.club/torjus/labmcp/internal/mcp"
|
|
"git.t-juice.club/torjus/labmcp/internal/monitoring"
|
|
)
|
|
|
|
const version = "0.1.0"
|
|
|
|
func main() {
|
|
app := &cli.App{
|
|
Name: "lab-monitoring",
|
|
Usage: "MCP server for Prometheus and Alertmanager monitoring",
|
|
Version: version,
|
|
Flags: []cli.Flag{
|
|
&cli.StringFlag{
|
|
Name: "prometheus-url",
|
|
Usage: "Prometheus base URL",
|
|
EnvVars: []string{"PROMETHEUS_URL"},
|
|
Value: "http://localhost:9090",
|
|
},
|
|
&cli.StringFlag{
|
|
Name: "alertmanager-url",
|
|
Usage: "Alertmanager base URL",
|
|
EnvVars: []string{"ALERTMANAGER_URL"},
|
|
Value: "http://localhost:9093",
|
|
},
|
|
},
|
|
Commands: []*cli.Command{
|
|
serveCommand(),
|
|
alertsCommand(),
|
|
queryCommand(),
|
|
targetsCommand(),
|
|
metricsCommand(),
|
|
},
|
|
}
|
|
|
|
if err := app.Run(os.Args); err != nil {
|
|
log.Fatal(err)
|
|
}
|
|
}
|
|
|
|
func serveCommand() *cli.Command {
|
|
return &cli.Command{
|
|
Name: "serve",
|
|
Usage: "Run MCP server for lab monitoring",
|
|
Flags: []cli.Flag{
|
|
&cli.StringFlag{
|
|
Name: "transport",
|
|
Aliases: []string{"t"},
|
|
Usage: "Transport type: 'stdio' or 'http'",
|
|
Value: "stdio",
|
|
},
|
|
&cli.StringFlag{
|
|
Name: "http-address",
|
|
Usage: "HTTP listen address",
|
|
Value: "127.0.0.1:8084",
|
|
},
|
|
&cli.StringFlag{
|
|
Name: "http-endpoint",
|
|
Usage: "HTTP endpoint path",
|
|
Value: "/mcp",
|
|
},
|
|
&cli.StringSliceFlag{
|
|
Name: "allowed-origins",
|
|
Usage: "Allowed Origin headers for CORS",
|
|
},
|
|
&cli.StringFlag{
|
|
Name: "tls-cert",
|
|
Usage: "TLS certificate file",
|
|
},
|
|
&cli.StringFlag{
|
|
Name: "tls-key",
|
|
Usage: "TLS key file",
|
|
},
|
|
&cli.DurationFlag{
|
|
Name: "session-ttl",
|
|
Usage: "Session TTL for HTTP transport",
|
|
Value: 30 * time.Minute,
|
|
},
|
|
},
|
|
Action: func(c *cli.Context) error {
|
|
return runServe(c)
|
|
},
|
|
}
|
|
}
|
|
|
|
func alertsCommand() *cli.Command {
|
|
return &cli.Command{
|
|
Name: "alerts",
|
|
Usage: "List alerts from Alertmanager",
|
|
Flags: []cli.Flag{
|
|
&cli.StringFlag{
|
|
Name: "state",
|
|
Usage: "Filter by state: active, suppressed, unprocessed",
|
|
},
|
|
&cli.StringFlag{
|
|
Name: "severity",
|
|
Usage: "Filter by severity label",
|
|
},
|
|
},
|
|
Action: func(c *cli.Context) error {
|
|
return runAlerts(c)
|
|
},
|
|
}
|
|
}
|
|
|
|
func queryCommand() *cli.Command {
|
|
return &cli.Command{
|
|
Name: "query",
|
|
Usage: "Execute an instant PromQL query",
|
|
ArgsUsage: "<promql>",
|
|
Action: func(c *cli.Context) error {
|
|
if c.NArg() < 1 {
|
|
return fmt.Errorf("promql expression required")
|
|
}
|
|
return runQuery(c, c.Args().First())
|
|
},
|
|
}
|
|
}
|
|
|
|
func targetsCommand() *cli.Command {
|
|
return &cli.Command{
|
|
Name: "targets",
|
|
Usage: "List scrape targets",
|
|
Action: func(c *cli.Context) error {
|
|
return runTargets(c)
|
|
},
|
|
}
|
|
}
|
|
|
|
func metricsCommand() *cli.Command {
|
|
return &cli.Command{
|
|
Name: "metrics",
|
|
Usage: "Search metric names",
|
|
ArgsUsage: "<search>",
|
|
Flags: []cli.Flag{
|
|
&cli.IntFlag{
|
|
Name: "limit",
|
|
Aliases: []string{"n"},
|
|
Usage: "Maximum number of results",
|
|
Value: 50,
|
|
},
|
|
},
|
|
Action: func(c *cli.Context) error {
|
|
query := ""
|
|
if c.NArg() > 0 {
|
|
query = c.Args().First()
|
|
}
|
|
return runMetrics(c, query)
|
|
},
|
|
}
|
|
}
|
|
|
|
func runServe(c *cli.Context) error {
|
|
ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
|
|
defer cancel()
|
|
|
|
logger := log.New(os.Stderr, "[mcp] ", log.LstdFlags)
|
|
config := mcp.DefaultMonitoringConfig()
|
|
|
|
prom := monitoring.NewPrometheusClient(c.String("prometheus-url"))
|
|
am := monitoring.NewAlertmanagerClient(c.String("alertmanager-url"))
|
|
|
|
config.InstructionsFunc = func() string {
|
|
return monitoring.AlertSummary(am)
|
|
}
|
|
|
|
server := mcp.NewGenericServer(logger, config)
|
|
monitoring.RegisterHandlers(server, prom, am)
|
|
|
|
transport := c.String("transport")
|
|
switch transport {
|
|
case "stdio":
|
|
logger.Println("Starting lab-monitoring MCP server on stdio...")
|
|
return server.Run(ctx, os.Stdin, os.Stdout)
|
|
|
|
case "http":
|
|
httpConfig := mcp.HTTPConfig{
|
|
Address: c.String("http-address"),
|
|
Endpoint: c.String("http-endpoint"),
|
|
AllowedOrigins: c.StringSlice("allowed-origins"),
|
|
SessionTTL: c.Duration("session-ttl"),
|
|
TLSCertFile: c.String("tls-cert"),
|
|
TLSKeyFile: c.String("tls-key"),
|
|
}
|
|
httpTransport := mcp.NewHTTPTransport(server, httpConfig)
|
|
return httpTransport.Run(ctx)
|
|
|
|
default:
|
|
return fmt.Errorf("unknown transport: %s (use 'stdio' or 'http')", transport)
|
|
}
|
|
}
|
|
|
|
func runAlerts(c *cli.Context) error {
|
|
ctx := context.Background()
|
|
am := monitoring.NewAlertmanagerClient(c.String("alertmanager-url"))
|
|
|
|
filters := monitoring.AlertFilters{}
|
|
if state := c.String("state"); state != "" {
|
|
switch state {
|
|
case "active":
|
|
active := true
|
|
filters.Active = &active
|
|
silenced := false
|
|
filters.Silenced = &silenced
|
|
inhibited := false
|
|
filters.Inhibited = &inhibited
|
|
case "suppressed":
|
|
active := false
|
|
filters.Active = &active
|
|
case "unprocessed":
|
|
unprocessed := true
|
|
filters.Unprocessed = &unprocessed
|
|
}
|
|
}
|
|
if severity := c.String("severity"); severity != "" {
|
|
filters.Filter = append(filters.Filter, fmt.Sprintf(`severity="%s"`, severity))
|
|
}
|
|
|
|
alerts, err := am.ListAlerts(ctx, filters)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to list alerts: %w", err)
|
|
}
|
|
|
|
if len(alerts) == 0 {
|
|
fmt.Println("No alerts found.")
|
|
return nil
|
|
}
|
|
|
|
for _, a := range alerts {
|
|
state := a.Status.State
|
|
severity := a.Labels["severity"]
|
|
name := a.Labels["alertname"]
|
|
fmt.Printf("[%s] %s (severity=%s, fingerprint=%s)\n", state, name, severity, a.Fingerprint)
|
|
for k, v := range a.Annotations {
|
|
fmt.Printf(" %s: %s\n", k, v)
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func runQuery(c *cli.Context, promql string) error {
|
|
ctx := context.Background()
|
|
prom := monitoring.NewPrometheusClient(c.String("prometheus-url"))
|
|
|
|
data, err := prom.Query(ctx, promql, time.Time{})
|
|
if err != nil {
|
|
return fmt.Errorf("query failed: %w", err)
|
|
}
|
|
|
|
for _, r := range data.Result {
|
|
labels := ""
|
|
for k, v := range r.Metric {
|
|
if labels != "" {
|
|
labels += ", "
|
|
}
|
|
labels += fmt.Sprintf("%s=%q", k, v)
|
|
}
|
|
value := ""
|
|
if len(r.Value) >= 2 {
|
|
if v, ok := r.Value[1].(string); ok {
|
|
value = v
|
|
}
|
|
}
|
|
fmt.Printf("{%s} %s\n", labels, value)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func runTargets(c *cli.Context) error {
|
|
ctx := context.Background()
|
|
prom := monitoring.NewPrometheusClient(c.String("prometheus-url"))
|
|
|
|
data, err := prom.Targets(ctx)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to fetch targets: %w", err)
|
|
}
|
|
|
|
if len(data.ActiveTargets) == 0 {
|
|
fmt.Println("No active targets.")
|
|
return nil
|
|
}
|
|
|
|
for _, t := range data.ActiveTargets {
|
|
job := t.Labels["job"]
|
|
instance := t.Labels["instance"]
|
|
fmt.Printf("[%s] %s/%s (last scrape: %s, duration: %.3fs)\n",
|
|
t.Health, job, instance, t.LastScrape.Format("15:04:05"), t.LastScrapeDuration)
|
|
if t.LastError != "" {
|
|
fmt.Printf(" error: %s\n", t.LastError)
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func runMetrics(c *cli.Context, query string) error {
|
|
ctx := context.Background()
|
|
prom := monitoring.NewPrometheusClient(c.String("prometheus-url"))
|
|
|
|
names, err := prom.LabelValues(ctx, "__name__")
|
|
if err != nil {
|
|
return fmt.Errorf("failed to fetch metric names: %w", err)
|
|
}
|
|
|
|
limit := c.Int("limit")
|
|
count := 0
|
|
for _, name := range names {
|
|
if query != "" {
|
|
// Simple case-insensitive substring match
|
|
if !containsIgnoreCase(name, query) {
|
|
continue
|
|
}
|
|
}
|
|
fmt.Println(name)
|
|
count++
|
|
if count >= limit {
|
|
fmt.Printf("... (showing %d of matching metrics, use --limit to see more)\n", limit)
|
|
break
|
|
}
|
|
}
|
|
|
|
if count == 0 {
|
|
fmt.Printf("No metrics found matching '%s'\n", query)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func containsIgnoreCase(s, substr string) bool {
|
|
sLower := make([]byte, len(s))
|
|
subLower := make([]byte, len(substr))
|
|
for i := range s {
|
|
if s[i] >= 'A' && s[i] <= 'Z' {
|
|
sLower[i] = s[i] + 32
|
|
} else {
|
|
sLower[i] = s[i]
|
|
}
|
|
}
|
|
for i := range substr {
|
|
if substr[i] >= 'A' && substr[i] <= 'Z' {
|
|
subLower[i] = substr[i] + 32
|
|
} else {
|
|
subLower[i] = substr[i]
|
|
}
|
|
}
|
|
|
|
for i := 0; i <= len(sLower)-len(subLower); i++ {
|
|
match := true
|
|
for j := range subLower {
|
|
if sLower[i+j] != subLower[j] {
|
|
match = false
|
|
break
|
|
}
|
|
}
|
|
if match {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|