This repository has been archived on 2026-03-10. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
labmcp/cmd/lab-monitoring/main.go
Torjus Håkestad 859e35ab5c feat: add Loki log query support to lab-monitoring
Add 3 opt-in Loki tools (query_logs, list_labels, list_label_values)
that are registered when LOKI_URL is configured. Includes Loki HTTP
client, CLI commands (logs, labels), NixOS module option, formatting,
and tests.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-05 20:55:39 +01:00

587 lines
13 KiB
Go

package main
import (
"context"
"fmt"
"log"
"os"
"os/signal"
"syscall"
"time"
"github.com/urfave/cli/v2"
"git.t-juice.club/torjus/labmcp/internal/mcp"
"git.t-juice.club/torjus/labmcp/internal/monitoring"
)
const version = "0.2.0"
func main() {
app := &cli.App{
Name: "lab-monitoring",
Usage: "MCP server for Prometheus and Alertmanager monitoring",
Version: version,
Flags: []cli.Flag{
&cli.StringFlag{
Name: "prometheus-url",
Usage: "Prometheus base URL",
EnvVars: []string{"PROMETHEUS_URL"},
Value: "http://localhost:9090",
},
&cli.StringFlag{
Name: "alertmanager-url",
Usage: "Alertmanager base URL",
EnvVars: []string{"ALERTMANAGER_URL"},
Value: "http://localhost:9093",
},
&cli.StringFlag{
Name: "loki-url",
Usage: "Loki base URL (optional, enables log query tools)",
EnvVars: []string{"LOKI_URL"},
},
},
Commands: []*cli.Command{
serveCommand(),
alertsCommand(),
queryCommand(),
targetsCommand(),
metricsCommand(),
logsCommand(),
labelsCommand(),
},
}
if err := app.Run(os.Args); err != nil {
log.Fatal(err)
}
}
func serveCommand() *cli.Command {
return &cli.Command{
Name: "serve",
Usage: "Run MCP server for lab monitoring",
Flags: []cli.Flag{
&cli.StringFlag{
Name: "transport",
Aliases: []string{"t"},
Usage: "Transport type: 'stdio' or 'http'",
Value: "stdio",
},
&cli.StringFlag{
Name: "http-address",
Usage: "HTTP listen address",
Value: "127.0.0.1:8084",
},
&cli.StringFlag{
Name: "http-endpoint",
Usage: "HTTP endpoint path",
Value: "/mcp",
},
&cli.StringSliceFlag{
Name: "allowed-origins",
Usage: "Allowed Origin headers for CORS",
},
&cli.StringFlag{
Name: "tls-cert",
Usage: "TLS certificate file",
},
&cli.StringFlag{
Name: "tls-key",
Usage: "TLS key file",
},
&cli.DurationFlag{
Name: "session-ttl",
Usage: "Session TTL for HTTP transport",
Value: 30 * time.Minute,
},
&cli.BoolFlag{
Name: "enable-silences",
Usage: "Enable the create_silence tool (write operation, disabled by default)",
},
},
Action: func(c *cli.Context) error {
return runServe(c)
},
}
}
func alertsCommand() *cli.Command {
return &cli.Command{
Name: "alerts",
Usage: "List alerts from Alertmanager",
Flags: []cli.Flag{
&cli.StringFlag{
Name: "state",
Usage: "Filter by state: active, suppressed, unprocessed",
},
&cli.StringFlag{
Name: "severity",
Usage: "Filter by severity label",
},
},
Action: func(c *cli.Context) error {
return runAlerts(c)
},
}
}
func queryCommand() *cli.Command {
return &cli.Command{
Name: "query",
Usage: "Execute an instant PromQL query",
ArgsUsage: "<promql>",
Action: func(c *cli.Context) error {
if c.NArg() < 1 {
return fmt.Errorf("promql expression required")
}
return runQuery(c, c.Args().First())
},
}
}
func targetsCommand() *cli.Command {
return &cli.Command{
Name: "targets",
Usage: "List scrape targets",
Action: func(c *cli.Context) error {
return runTargets(c)
},
}
}
func metricsCommand() *cli.Command {
return &cli.Command{
Name: "metrics",
Usage: "Search metric names",
ArgsUsage: "<search>",
Flags: []cli.Flag{
&cli.IntFlag{
Name: "limit",
Aliases: []string{"n"},
Usage: "Maximum number of results",
Value: 50,
},
},
Action: func(c *cli.Context) error {
query := ""
if c.NArg() > 0 {
query = c.Args().First()
}
return runMetrics(c, query)
},
}
}
func runServe(c *cli.Context) error {
ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
defer cancel()
logger := log.New(os.Stderr, "[mcp] ", log.LstdFlags)
config := mcp.DefaultMonitoringConfig()
prom := monitoring.NewPrometheusClient(c.String("prometheus-url"))
am := monitoring.NewAlertmanagerClient(c.String("alertmanager-url"))
var loki *monitoring.LokiClient
if lokiURL := c.String("loki-url"); lokiURL != "" {
loki = monitoring.NewLokiClient(lokiURL)
}
config.InstructionsFunc = func() string {
return monitoring.AlertSummary(am)
}
server := mcp.NewGenericServer(logger, config)
opts := monitoring.HandlerOptions{
EnableSilences: c.Bool("enable-silences"),
}
monitoring.RegisterHandlers(server, prom, am, loki, opts)
transport := c.String("transport")
switch transport {
case "stdio":
logger.Println("Starting lab-monitoring MCP server on stdio...")
return server.Run(ctx, os.Stdin, os.Stdout)
case "http":
httpConfig := mcp.HTTPConfig{
Address: c.String("http-address"),
Endpoint: c.String("http-endpoint"),
AllowedOrigins: c.StringSlice("allowed-origins"),
SessionTTL: c.Duration("session-ttl"),
TLSCertFile: c.String("tls-cert"),
TLSKeyFile: c.String("tls-key"),
}
httpTransport := mcp.NewHTTPTransport(server, httpConfig)
return httpTransport.Run(ctx)
default:
return fmt.Errorf("unknown transport: %s (use 'stdio' or 'http')", transport)
}
}
func runAlerts(c *cli.Context) error {
ctx := context.Background()
am := monitoring.NewAlertmanagerClient(c.String("alertmanager-url"))
filters := monitoring.AlertFilters{}
if state := c.String("state"); state != "" {
switch state {
case "active":
active := true
filters.Active = &active
silenced := false
filters.Silenced = &silenced
inhibited := false
filters.Inhibited = &inhibited
case "suppressed":
active := false
filters.Active = &active
case "unprocessed":
unprocessed := true
filters.Unprocessed = &unprocessed
}
}
if severity := c.String("severity"); severity != "" {
filters.Filter = append(filters.Filter, fmt.Sprintf(`severity="%s"`, severity))
}
alerts, err := am.ListAlerts(ctx, filters)
if err != nil {
return fmt.Errorf("failed to list alerts: %w", err)
}
if len(alerts) == 0 {
fmt.Println("No alerts found.")
return nil
}
for _, a := range alerts {
state := a.Status.State
severity := a.Labels["severity"]
name := a.Labels["alertname"]
fmt.Printf("[%s] %s (severity=%s, fingerprint=%s)\n", state, name, severity, a.Fingerprint)
for k, v := range a.Annotations {
fmt.Printf(" %s: %s\n", k, v)
}
}
return nil
}
func runQuery(c *cli.Context, promql string) error {
ctx := context.Background()
prom := monitoring.NewPrometheusClient(c.String("prometheus-url"))
data, err := prom.Query(ctx, promql, time.Time{})
if err != nil {
return fmt.Errorf("query failed: %w", err)
}
for _, r := range data.Result {
labels := ""
for k, v := range r.Metric {
if labels != "" {
labels += ", "
}
labels += fmt.Sprintf("%s=%q", k, v)
}
value := ""
if len(r.Value) >= 2 {
if v, ok := r.Value[1].(string); ok {
value = v
}
}
fmt.Printf("{%s} %s\n", labels, value)
}
return nil
}
func runTargets(c *cli.Context) error {
ctx := context.Background()
prom := monitoring.NewPrometheusClient(c.String("prometheus-url"))
data, err := prom.Targets(ctx)
if err != nil {
return fmt.Errorf("failed to fetch targets: %w", err)
}
if len(data.ActiveTargets) == 0 {
fmt.Println("No active targets.")
return nil
}
for _, t := range data.ActiveTargets {
job := t.Labels["job"]
instance := t.Labels["instance"]
fmt.Printf("[%s] %s/%s (last scrape: %s, duration: %.3fs)\n",
t.Health, job, instance, t.LastScrape.Format("15:04:05"), t.LastScrapeDuration)
if t.LastError != "" {
fmt.Printf(" error: %s\n", t.LastError)
}
}
return nil
}
func runMetrics(c *cli.Context, query string) error {
ctx := context.Background()
prom := monitoring.NewPrometheusClient(c.String("prometheus-url"))
names, err := prom.LabelValues(ctx, "__name__")
if err != nil {
return fmt.Errorf("failed to fetch metric names: %w", err)
}
limit := c.Int("limit")
count := 0
for _, name := range names {
if query != "" {
// Simple case-insensitive substring match
if !containsIgnoreCase(name, query) {
continue
}
}
fmt.Println(name)
count++
if count >= limit {
fmt.Printf("... (showing %d of matching metrics, use --limit to see more)\n", limit)
break
}
}
if count == 0 {
fmt.Printf("No metrics found matching '%s'\n", query)
}
return nil
}
func logsCommand() *cli.Command {
return &cli.Command{
Name: "logs",
Usage: "Query logs from Loki using LogQL",
ArgsUsage: "<logql>",
Flags: []cli.Flag{
&cli.StringFlag{
Name: "start",
Usage: "Start time: relative duration (e.g., '1h'), RFC3339, or Unix epoch",
Value: "1h",
},
&cli.StringFlag{
Name: "end",
Usage: "End time: relative duration, RFC3339, or Unix epoch",
Value: "now",
},
&cli.IntFlag{
Name: "limit",
Aliases: []string{"n"},
Usage: "Maximum number of entries",
Value: 100,
},
&cli.StringFlag{
Name: "direction",
Usage: "Sort order: 'backward' (newest first) or 'forward' (oldest first)",
Value: "backward",
},
},
Action: func(c *cli.Context) error {
if c.NArg() < 1 {
return fmt.Errorf("LogQL expression required")
}
return runLogs(c, c.Args().First())
},
}
}
func labelsCommand() *cli.Command {
return &cli.Command{
Name: "labels",
Usage: "List labels from Loki, or values for a specific label",
Flags: []cli.Flag{
&cli.StringFlag{
Name: "values",
Usage: "Get values for this label name instead of listing labels",
},
},
Action: func(c *cli.Context) error {
return runLabels(c)
},
}
}
func runLogs(c *cli.Context, logql string) error {
lokiURL := c.String("loki-url")
if lokiURL == "" {
return fmt.Errorf("--loki-url or LOKI_URL is required for log queries")
}
ctx := context.Background()
loki := monitoring.NewLokiClient(lokiURL)
now := time.Now()
start, err := parseCLITime(c.String("start"), now.Add(-time.Hour))
if err != nil {
return fmt.Errorf("invalid start time: %w", err)
}
end, err := parseCLITime(c.String("end"), now)
if err != nil {
return fmt.Errorf("invalid end time: %w", err)
}
data, err := loki.QueryRange(ctx, logql, start, end, c.Int("limit"), c.String("direction"))
if err != nil {
return fmt.Errorf("log query failed: %w", err)
}
totalEntries := 0
for _, stream := range data.Result {
totalEntries += len(stream.Values)
}
if totalEntries == 0 {
fmt.Println("No log entries found.")
return nil
}
for _, stream := range data.Result {
// Print stream labels
labels := ""
for k, v := range stream.Stream {
if labels != "" {
labels += ", "
}
labels += fmt.Sprintf("%s=%q", k, v)
}
fmt.Printf("--- {%s} ---\n", labels)
for _, entry := range stream.Values {
ts := formatCLITimestamp(entry[0])
fmt.Printf("[%s] %s\n", ts, entry[1])
}
fmt.Println()
}
return nil
}
func runLabels(c *cli.Context) error {
lokiURL := c.String("loki-url")
if lokiURL == "" {
return fmt.Errorf("--loki-url or LOKI_URL is required for label queries")
}
ctx := context.Background()
loki := monitoring.NewLokiClient(lokiURL)
if label := c.String("values"); label != "" {
values, err := loki.LabelValues(ctx, label)
if err != nil {
return fmt.Errorf("failed to list label values: %w", err)
}
if len(values) == 0 {
fmt.Printf("No values found for label '%s'.\n", label)
return nil
}
for _, v := range values {
fmt.Println(v)
}
return nil
}
labels, err := loki.Labels(ctx)
if err != nil {
return fmt.Errorf("failed to list labels: %w", err)
}
if len(labels) == 0 {
fmt.Println("No labels found.")
return nil
}
for _, label := range labels {
fmt.Println(label)
}
return nil
}
// parseCLITime parses a time string for CLI use. Handles "now", relative durations,
// RFC3339, and Unix epoch seconds.
func parseCLITime(s string, defaultTime time.Time) (time.Time, error) {
if s == "now" || s == "" {
return time.Now(), nil
}
// Try as relative duration
if d, err := time.ParseDuration(s); err == nil {
return time.Now().Add(-d), nil
}
// Try as RFC3339
if t, err := time.Parse(time.RFC3339, s); err == nil {
return t, nil
}
// Try as Unix epoch seconds
var epoch int64
validDigits := true
for _, c := range s {
if c >= '0' && c <= '9' {
epoch = epoch*10 + int64(c-'0')
} else {
validDigits = false
break
}
}
if validDigits && len(s) > 0 {
return time.Unix(epoch, 0), nil
}
return defaultTime, fmt.Errorf("cannot parse time '%s'", s)
}
// formatCLITimestamp converts a nanosecond Unix timestamp string to a readable format.
func formatCLITimestamp(nsStr string) string {
var ns int64
for _, c := range nsStr {
if c >= '0' && c <= '9' {
ns = ns*10 + int64(c-'0')
}
}
t := time.Unix(0, ns)
return t.Local().Format("2006-01-02 15:04:05")
}
func containsIgnoreCase(s, substr string) bool {
sLower := make([]byte, len(s))
subLower := make([]byte, len(substr))
for i := range s {
if s[i] >= 'A' && s[i] <= 'Z' {
sLower[i] = s[i] + 32
} else {
sLower[i] = s[i]
}
}
for i := range substr {
if substr[i] >= 'A' && substr[i] <= 'Z' {
subLower[i] = substr[i] + 32
} else {
subLower[i] = substr[i]
}
}
for i := 0; i <= len(sLower)-len(subLower); i++ {
match := true
for j := range subLower {
if sLower[i+j] != subLower[j] {
match = false
break
}
}
if match {
return true
}
}
return false
}