package main import ( "context" "fmt" "log" "os" "os/signal" "syscall" "time" "github.com/urfave/cli/v2" "git.t-juice.club/torjus/labmcp/internal/mcp" "git.t-juice.club/torjus/labmcp/internal/monitoring" ) const version = "0.2.0" func main() { app := &cli.App{ Name: "lab-monitoring", Usage: "MCP server for Prometheus and Alertmanager monitoring", Version: version, Flags: []cli.Flag{ &cli.StringFlag{ Name: "prometheus-url", Usage: "Prometheus base URL", EnvVars: []string{"PROMETHEUS_URL"}, Value: "http://localhost:9090", }, &cli.StringFlag{ Name: "alertmanager-url", Usage: "Alertmanager base URL", EnvVars: []string{"ALERTMANAGER_URL"}, Value: "http://localhost:9093", }, &cli.StringFlag{ Name: "loki-url", Usage: "Loki base URL (optional, enables log query tools)", EnvVars: []string{"LOKI_URL"}, }, }, Commands: []*cli.Command{ serveCommand(), alertsCommand(), queryCommand(), targetsCommand(), metricsCommand(), logsCommand(), labelsCommand(), }, } if err := app.Run(os.Args); err != nil { log.Fatal(err) } } func serveCommand() *cli.Command { return &cli.Command{ Name: "serve", Usage: "Run MCP server for lab monitoring", Flags: []cli.Flag{ &cli.StringFlag{ Name: "transport", Aliases: []string{"t"}, Usage: "Transport type: 'stdio' or 'http'", Value: "stdio", }, &cli.StringFlag{ Name: "http-address", Usage: "HTTP listen address", Value: "127.0.0.1:8084", }, &cli.StringFlag{ Name: "http-endpoint", Usage: "HTTP endpoint path", Value: "/mcp", }, &cli.StringSliceFlag{ Name: "allowed-origins", Usage: "Allowed Origin headers for CORS", }, &cli.StringFlag{ Name: "tls-cert", Usage: "TLS certificate file", }, &cli.StringFlag{ Name: "tls-key", Usage: "TLS key file", }, &cli.DurationFlag{ Name: "session-ttl", Usage: "Session TTL for HTTP transport", Value: 30 * time.Minute, }, &cli.BoolFlag{ Name: "enable-silences", Usage: "Enable the create_silence tool (write operation, disabled by default)", }, }, Action: func(c *cli.Context) error { return runServe(c) }, } } func alertsCommand() *cli.Command { return &cli.Command{ Name: "alerts", Usage: "List alerts from Alertmanager", Flags: []cli.Flag{ &cli.StringFlag{ Name: "state", Usage: "Filter by state: active, suppressed, unprocessed", }, &cli.StringFlag{ Name: "severity", Usage: "Filter by severity label", }, }, Action: func(c *cli.Context) error { return runAlerts(c) }, } } func queryCommand() *cli.Command { return &cli.Command{ Name: "query", Usage: "Execute an instant PromQL query", ArgsUsage: "", Action: func(c *cli.Context) error { if c.NArg() < 1 { return fmt.Errorf("promql expression required") } return runQuery(c, c.Args().First()) }, } } func targetsCommand() *cli.Command { return &cli.Command{ Name: "targets", Usage: "List scrape targets", Action: func(c *cli.Context) error { return runTargets(c) }, } } func metricsCommand() *cli.Command { return &cli.Command{ Name: "metrics", Usage: "Search metric names", ArgsUsage: "", Flags: []cli.Flag{ &cli.IntFlag{ Name: "limit", Aliases: []string{"n"}, Usage: "Maximum number of results", Value: 50, }, }, Action: func(c *cli.Context) error { query := "" if c.NArg() > 0 { query = c.Args().First() } return runMetrics(c, query) }, } } func runServe(c *cli.Context) error { ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM) defer cancel() logger := log.New(os.Stderr, "[mcp] ", log.LstdFlags) config := mcp.DefaultMonitoringConfig() prom := monitoring.NewPrometheusClient(c.String("prometheus-url")) am := monitoring.NewAlertmanagerClient(c.String("alertmanager-url")) var loki *monitoring.LokiClient if lokiURL := c.String("loki-url"); lokiURL != "" { loki = monitoring.NewLokiClient(lokiURL) } config.InstructionsFunc = func() string { return monitoring.AlertSummary(am) } server := mcp.NewGenericServer(logger, config) opts := monitoring.HandlerOptions{ EnableSilences: c.Bool("enable-silences"), } monitoring.RegisterHandlers(server, prom, am, loki, opts) transport := c.String("transport") switch transport { case "stdio": logger.Println("Starting lab-monitoring MCP server on stdio...") return server.Run(ctx, os.Stdin, os.Stdout) case "http": httpConfig := mcp.HTTPConfig{ Address: c.String("http-address"), Endpoint: c.String("http-endpoint"), AllowedOrigins: c.StringSlice("allowed-origins"), SessionTTL: c.Duration("session-ttl"), TLSCertFile: c.String("tls-cert"), TLSKeyFile: c.String("tls-key"), } httpTransport := mcp.NewHTTPTransport(server, httpConfig) return httpTransport.Run(ctx) default: return fmt.Errorf("unknown transport: %s (use 'stdio' or 'http')", transport) } } func runAlerts(c *cli.Context) error { ctx := context.Background() am := monitoring.NewAlertmanagerClient(c.String("alertmanager-url")) filters := monitoring.AlertFilters{} if state := c.String("state"); state != "" { switch state { case "active": active := true filters.Active = &active silenced := false filters.Silenced = &silenced inhibited := false filters.Inhibited = &inhibited case "suppressed": active := false filters.Active = &active case "unprocessed": unprocessed := true filters.Unprocessed = &unprocessed } } if severity := c.String("severity"); severity != "" { filters.Filter = append(filters.Filter, fmt.Sprintf(`severity="%s"`, severity)) } alerts, err := am.ListAlerts(ctx, filters) if err != nil { return fmt.Errorf("failed to list alerts: %w", err) } if len(alerts) == 0 { fmt.Println("No alerts found.") return nil } for _, a := range alerts { state := a.Status.State severity := a.Labels["severity"] name := a.Labels["alertname"] fmt.Printf("[%s] %s (severity=%s, fingerprint=%s)\n", state, name, severity, a.Fingerprint) for k, v := range a.Annotations { fmt.Printf(" %s: %s\n", k, v) } } return nil } func runQuery(c *cli.Context, promql string) error { ctx := context.Background() prom := monitoring.NewPrometheusClient(c.String("prometheus-url")) data, err := prom.Query(ctx, promql, time.Time{}) if err != nil { return fmt.Errorf("query failed: %w", err) } for _, r := range data.Result { labels := "" for k, v := range r.Metric { if labels != "" { labels += ", " } labels += fmt.Sprintf("%s=%q", k, v) } value := "" if len(r.Value) >= 2 { if v, ok := r.Value[1].(string); ok { value = v } } fmt.Printf("{%s} %s\n", labels, value) } return nil } func runTargets(c *cli.Context) error { ctx := context.Background() prom := monitoring.NewPrometheusClient(c.String("prometheus-url")) data, err := prom.Targets(ctx) if err != nil { return fmt.Errorf("failed to fetch targets: %w", err) } if len(data.ActiveTargets) == 0 { fmt.Println("No active targets.") return nil } for _, t := range data.ActiveTargets { job := t.Labels["job"] instance := t.Labels["instance"] fmt.Printf("[%s] %s/%s (last scrape: %s, duration: %.3fs)\n", t.Health, job, instance, t.LastScrape.Format("15:04:05"), t.LastScrapeDuration) if t.LastError != "" { fmt.Printf(" error: %s\n", t.LastError) } } return nil } func runMetrics(c *cli.Context, query string) error { ctx := context.Background() prom := monitoring.NewPrometheusClient(c.String("prometheus-url")) names, err := prom.LabelValues(ctx, "__name__") if err != nil { return fmt.Errorf("failed to fetch metric names: %w", err) } limit := c.Int("limit") count := 0 for _, name := range names { if query != "" { // Simple case-insensitive substring match if !containsIgnoreCase(name, query) { continue } } fmt.Println(name) count++ if count >= limit { fmt.Printf("... (showing %d of matching metrics, use --limit to see more)\n", limit) break } } if count == 0 { fmt.Printf("No metrics found matching '%s'\n", query) } return nil } func logsCommand() *cli.Command { return &cli.Command{ Name: "logs", Usage: "Query logs from Loki using LogQL", ArgsUsage: "", Flags: []cli.Flag{ &cli.StringFlag{ Name: "start", Usage: "Start time: relative duration (e.g., '1h'), RFC3339, or Unix epoch", Value: "1h", }, &cli.StringFlag{ Name: "end", Usage: "End time: relative duration, RFC3339, or Unix epoch", Value: "now", }, &cli.IntFlag{ Name: "limit", Aliases: []string{"n"}, Usage: "Maximum number of entries", Value: 100, }, &cli.StringFlag{ Name: "direction", Usage: "Sort order: 'backward' (newest first) or 'forward' (oldest first)", Value: "backward", }, }, Action: func(c *cli.Context) error { if c.NArg() < 1 { return fmt.Errorf("LogQL expression required") } return runLogs(c, c.Args().First()) }, } } func labelsCommand() *cli.Command { return &cli.Command{ Name: "labels", Usage: "List labels from Loki, or values for a specific label", Flags: []cli.Flag{ &cli.StringFlag{ Name: "values", Usage: "Get values for this label name instead of listing labels", }, }, Action: func(c *cli.Context) error { return runLabels(c) }, } } func runLogs(c *cli.Context, logql string) error { lokiURL := c.String("loki-url") if lokiURL == "" { return fmt.Errorf("--loki-url or LOKI_URL is required for log queries") } ctx := context.Background() loki := monitoring.NewLokiClient(lokiURL) now := time.Now() start, err := parseCLITime(c.String("start"), now.Add(-time.Hour)) if err != nil { return fmt.Errorf("invalid start time: %w", err) } end, err := parseCLITime(c.String("end"), now) if err != nil { return fmt.Errorf("invalid end time: %w", err) } data, err := loki.QueryRange(ctx, logql, start, end, c.Int("limit"), c.String("direction")) if err != nil { return fmt.Errorf("log query failed: %w", err) } totalEntries := 0 for _, stream := range data.Result { totalEntries += len(stream.Values) } if totalEntries == 0 { fmt.Println("No log entries found.") return nil } for _, stream := range data.Result { // Print stream labels labels := "" for k, v := range stream.Stream { if labels != "" { labels += ", " } labels += fmt.Sprintf("%s=%q", k, v) } fmt.Printf("--- {%s} ---\n", labels) for _, entry := range stream.Values { ts := formatCLITimestamp(entry[0]) fmt.Printf("[%s] %s\n", ts, entry[1]) } fmt.Println() } return nil } func runLabels(c *cli.Context) error { lokiURL := c.String("loki-url") if lokiURL == "" { return fmt.Errorf("--loki-url or LOKI_URL is required for label queries") } ctx := context.Background() loki := monitoring.NewLokiClient(lokiURL) if label := c.String("values"); label != "" { values, err := loki.LabelValues(ctx, label) if err != nil { return fmt.Errorf("failed to list label values: %w", err) } if len(values) == 0 { fmt.Printf("No values found for label '%s'.\n", label) return nil } for _, v := range values { fmt.Println(v) } return nil } labels, err := loki.Labels(ctx) if err != nil { return fmt.Errorf("failed to list labels: %w", err) } if len(labels) == 0 { fmt.Println("No labels found.") return nil } for _, label := range labels { fmt.Println(label) } return nil } // parseCLITime parses a time string for CLI use. Handles "now", relative durations, // RFC3339, and Unix epoch seconds. func parseCLITime(s string, defaultTime time.Time) (time.Time, error) { if s == "now" || s == "" { return time.Now(), nil } // Try as relative duration if d, err := time.ParseDuration(s); err == nil { return time.Now().Add(-d), nil } // Try as RFC3339 if t, err := time.Parse(time.RFC3339, s); err == nil { return t, nil } // Try as Unix epoch seconds var epoch int64 validDigits := true for _, c := range s { if c >= '0' && c <= '9' { epoch = epoch*10 + int64(c-'0') } else { validDigits = false break } } if validDigits && len(s) > 0 { return time.Unix(epoch, 0), nil } return defaultTime, fmt.Errorf("cannot parse time '%s'", s) } // formatCLITimestamp converts a nanosecond Unix timestamp string to a readable format. func formatCLITimestamp(nsStr string) string { var ns int64 for _, c := range nsStr { if c >= '0' && c <= '9' { ns = ns*10 + int64(c-'0') } } t := time.Unix(0, ns) return t.Local().Format("2006-01-02 15:04:05") } func containsIgnoreCase(s, substr string) bool { sLower := make([]byte, len(s)) subLower := make([]byte, len(substr)) for i := range s { if s[i] >= 'A' && s[i] <= 'Z' { sLower[i] = s[i] + 32 } else { sLower[i] = s[i] } } for i := range substr { if substr[i] >= 'A' && substr[i] <= 'Z' { subLower[i] = substr[i] + 32 } else { subLower[i] = substr[i] } } for i := 0; i <= len(sLower)-len(subLower); i++ { match := true for j := range subLower { if sLower[i+j] != subLower[j] { match = false break } } if match { return true } } return false }