feat: add lab-monitoring MCP server for Prometheus and Alertmanager

New MCP server that queries live Prometheus and Alertmanager HTTP APIs
with 8 tools: list_alerts, get_alert, search_metrics, get_metric_metadata,
query (PromQL), list_targets, list_silences, and create_silence.

Extends the MCP core with ModeCustom and NewGenericServer for servers
that don't require a database. Includes CLI with direct commands
(alerts, query, targets, metrics), NixOS module, and comprehensive
httptest-based tests.

Bumps existing binaries to 0.2.1 due to shared internal/mcp change.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-02-04 23:11:53 +01:00
parent 0bd4ed778a
commit 1755364bba
19 changed files with 2567 additions and 22 deletions

View File

@@ -0,0 +1,153 @@
package monitoring
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"time"
)
// AlertmanagerClient is an HTTP client for the Alertmanager API v2.
type AlertmanagerClient struct {
baseURL string
httpClient *http.Client
}
// NewAlertmanagerClient creates a new Alertmanager API client.
func NewAlertmanagerClient(baseURL string) *AlertmanagerClient {
return &AlertmanagerClient{
baseURL: strings.TrimRight(baseURL, "/"),
httpClient: &http.Client{
Timeout: 30 * time.Second,
},
}
}
// ListAlerts returns alerts matching the given filters.
func (c *AlertmanagerClient) ListAlerts(ctx context.Context, filters AlertFilters) ([]Alert, error) {
params := url.Values{}
if filters.Active != nil {
params.Set("active", fmt.Sprintf("%t", *filters.Active))
}
if filters.Silenced != nil {
params.Set("silenced", fmt.Sprintf("%t", *filters.Silenced))
}
if filters.Inhibited != nil {
params.Set("inhibited", fmt.Sprintf("%t", *filters.Inhibited))
}
if filters.Unprocessed != nil {
params.Set("unprocessed", fmt.Sprintf("%t", *filters.Unprocessed))
}
if filters.Receiver != "" {
params.Set("receiver", filters.Receiver)
}
for _, f := range filters.Filter {
params.Add("filter", f)
}
u := c.baseURL + "/api/v2/alerts"
if len(params) > 0 {
u += "?" + params.Encode()
}
req, err := http.NewRequestWithContext(ctx, http.MethodGet, u, nil)
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
resp, err := c.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("request failed: %w", err)
}
defer resp.Body.Close() //nolint:errcheck // cleanup on exit
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("failed to read response: %w", err)
}
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("unexpected status %d: %s", resp.StatusCode, string(body))
}
var alerts []Alert
if err := json.Unmarshal(body, &alerts); err != nil {
return nil, fmt.Errorf("failed to parse alerts: %w", err)
}
return alerts, nil
}
// ListSilences returns all silences.
func (c *AlertmanagerClient) ListSilences(ctx context.Context) ([]Silence, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, c.baseURL+"/api/v2/silences", nil)
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
resp, err := c.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("request failed: %w", err)
}
defer resp.Body.Close() //nolint:errcheck // cleanup on exit
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("failed to read response: %w", err)
}
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("unexpected status %d: %s", resp.StatusCode, string(body))
}
var silences []Silence
if err := json.Unmarshal(body, &silences); err != nil {
return nil, fmt.Errorf("failed to parse silences: %w", err)
}
return silences, nil
}
// CreateSilence creates a new silence and returns the silence ID.
func (c *AlertmanagerClient) CreateSilence(ctx context.Context, silence Silence) (string, error) {
data, err := json.Marshal(silence)
if err != nil {
return "", fmt.Errorf("failed to marshal silence: %w", err)
}
req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.baseURL+"/api/v2/silences", bytes.NewReader(data))
if err != nil {
return "", fmt.Errorf("failed to create request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
resp, err := c.httpClient.Do(req)
if err != nil {
return "", fmt.Errorf("request failed: %w", err)
}
defer resp.Body.Close() //nolint:errcheck // cleanup on exit
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", fmt.Errorf("failed to read response: %w", err)
}
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("unexpected status %d: %s", resp.StatusCode, string(body))
}
var result struct {
SilenceID string `json:"silenceID"`
}
if err := json.Unmarshal(body, &result); err != nil {
return "", fmt.Errorf("failed to parse response: %w", err)
}
return result.SilenceID, nil
}

View File

@@ -0,0 +1,175 @@
package monitoring
import (
"context"
"encoding/json"
"net/http"
"net/http/httptest"
"testing"
"time"
)
func TestAlertmanagerClient_ListAlerts(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != "/api/v2/alerts" {
t.Errorf("unexpected path: %s", r.URL.Path)
}
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`[
{
"annotations": {"summary": "Target is down"},
"endsAt": "2024-01-01T01:00:00Z",
"fingerprint": "abc123",
"receivers": [{"name": "default"}],
"startsAt": "2024-01-01T00:00:00Z",
"status": {"inhibitedBy": [], "silencedBy": [], "state": "active"},
"updatedAt": "2024-01-01T00:00:00Z",
"generatorURL": "http://prometheus:9090/graph",
"labels": {"alertname": "TargetDown", "severity": "critical", "instance": "node1:9100"}
}
]`))
}))
defer srv.Close()
client := NewAlertmanagerClient(srv.URL)
alerts, err := client.ListAlerts(context.Background(), AlertFilters{})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(alerts) != 1 {
t.Fatalf("expected 1 alert, got %d", len(alerts))
}
if alerts[0].Fingerprint != "abc123" {
t.Errorf("expected fingerprint=abc123, got %s", alerts[0].Fingerprint)
}
if alerts[0].Labels["alertname"] != "TargetDown" {
t.Errorf("expected alertname=TargetDown, got %s", alerts[0].Labels["alertname"])
}
if alerts[0].Status.State != "active" {
t.Errorf("expected state=active, got %s", alerts[0].Status.State)
}
}
func TestAlertmanagerClient_ListAlertsWithFilters(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
q := r.URL.Query()
if q.Get("active") != "true" {
t.Errorf("expected active=true, got %s", q.Get("active"))
}
if q.Get("silenced") != "false" {
t.Errorf("expected silenced=false, got %s", q.Get("silenced"))
}
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`[]`))
}))
defer srv.Close()
client := NewAlertmanagerClient(srv.URL)
active := true
silenced := false
_, err := client.ListAlerts(context.Background(), AlertFilters{
Active: &active,
Silenced: &silenced,
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
}
func TestAlertmanagerClient_ListSilences(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != "/api/v2/silences" {
t.Errorf("unexpected path: %s", r.URL.Path)
}
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`[
{
"id": "silence-1",
"matchers": [{"name": "alertname", "value": "TargetDown", "isRegex": false}],
"startsAt": "2024-01-01T00:00:00Z",
"endsAt": "2024-01-01T02:00:00Z",
"createdBy": "admin",
"comment": "Maintenance window",
"status": {"state": "active"}
}
]`))
}))
defer srv.Close()
client := NewAlertmanagerClient(srv.URL)
silences, err := client.ListSilences(context.Background())
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(silences) != 1 {
t.Fatalf("expected 1 silence, got %d", len(silences))
}
if silences[0].ID != "silence-1" {
t.Errorf("expected id=silence-1, got %s", silences[0].ID)
}
if silences[0].CreatedBy != "admin" {
t.Errorf("expected createdBy=admin, got %s", silences[0].CreatedBy)
}
}
func TestAlertmanagerClient_CreateSilence(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodPost {
t.Errorf("expected POST, got %s", r.Method)
}
if r.URL.Path != "/api/v2/silences" {
t.Errorf("unexpected path: %s", r.URL.Path)
}
if r.Header.Get("Content-Type") != "application/json" {
t.Errorf("expected Content-Type=application/json, got %s", r.Header.Get("Content-Type"))
}
var silence Silence
if err := json.NewDecoder(r.Body).Decode(&silence); err != nil {
t.Fatalf("failed to decode request body: %v", err)
}
if silence.CreatedBy != "admin" {
t.Errorf("expected createdBy=admin, got %s", silence.CreatedBy)
}
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{"silenceID": "new-silence-id"}`))
}))
defer srv.Close()
client := NewAlertmanagerClient(srv.URL)
id, err := client.CreateSilence(context.Background(), Silence{
Matchers: []Matcher{
{Name: "alertname", Value: "TargetDown", IsRegex: false},
},
StartsAt: time.Now(),
EndsAt: time.Now().Add(2 * time.Hour),
CreatedBy: "admin",
Comment: "Test silence",
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if id != "new-silence-id" {
t.Errorf("expected id=new-silence-id, got %s", id)
}
}
func TestAlertmanagerClient_HTTPError(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusInternalServerError)
_, _ = w.Write([]byte("internal error"))
}))
defer srv.Close()
client := NewAlertmanagerClient(srv.URL)
_, err := client.ListAlerts(context.Background(), AlertFilters{})
if err == nil {
t.Fatal("expected error, got nil")
}
}

View File

@@ -0,0 +1,317 @@
package monitoring
import (
"fmt"
"sort"
"strings"
"time"
)
const maxRows = 100
// formatInstantVector formats instant vector results as a markdown table.
func formatInstantVector(results []PromInstantVector) string {
if len(results) == 0 {
return "No results."
}
// Collect all label keys across results (excluding __name__)
labelKeys := collectLabelKeys(results)
var sb strings.Builder
// Header
sb.WriteString("| ")
if _, ok := results[0].Metric["__name__"]; ok {
sb.WriteString("Metric | ")
}
for _, key := range labelKeys {
sb.WriteString(key)
sb.WriteString(" | ")
}
sb.WriteString("Value |\n")
// Separator
sb.WriteString("| ")
if _, ok := results[0].Metric["__name__"]; ok {
sb.WriteString("--- | ")
}
for range labelKeys {
sb.WriteString("--- | ")
}
sb.WriteString("--- |\n")
// Rows
truncated := false
for i, r := range results {
if i >= maxRows {
truncated = true
break
}
sb.WriteString("| ")
if _, ok := results[0].Metric["__name__"]; ok {
sb.WriteString(r.Metric["__name__"])
sb.WriteString(" | ")
}
for _, key := range labelKeys {
sb.WriteString(r.Metric[key])
sb.WriteString(" | ")
}
// Value is at index 1 of the value tuple
if len(r.Value) >= 2 {
if v, ok := r.Value[1].(string); ok {
sb.WriteString(v)
}
}
sb.WriteString(" |\n")
}
if truncated {
sb.WriteString(fmt.Sprintf("\n*Showing %d of %d results (truncated)*\n", maxRows, len(results)))
}
return sb.String()
}
// collectLabelKeys returns sorted label keys across all results, excluding __name__.
func collectLabelKeys(results []PromInstantVector) []string {
keySet := make(map[string]struct{})
for _, r := range results {
for k := range r.Metric {
if k != "__name__" {
keySet[k] = struct{}{}
}
}
}
keys := make([]string, 0, len(keySet))
for k := range keySet {
keys = append(keys, k)
}
sort.Strings(keys)
return keys
}
// formatAlerts formats alerts as grouped markdown.
func formatAlerts(alerts []Alert) string {
if len(alerts) == 0 {
return "No alerts found."
}
// Group by alertname
groups := make(map[string][]Alert)
var order []string
for _, a := range alerts {
name := a.Labels["alertname"]
if _, exists := groups[name]; !exists {
order = append(order, name)
}
groups[name] = append(groups[name], a)
}
var sb strings.Builder
sb.WriteString(fmt.Sprintf("**%d alert(s)**\n\n", len(alerts)))
for _, name := range order {
group := groups[name]
sb.WriteString(fmt.Sprintf("## %s (%d)\n\n", name, len(group)))
for i, a := range group {
if i >= maxRows {
sb.WriteString(fmt.Sprintf("*... and %d more*\n", len(group)-maxRows))
break
}
sb.WriteString(fmt.Sprintf("**State:** %s | **Severity:** %s\n", a.Status.State, a.Labels["severity"]))
// Labels (excluding alertname and severity)
var labels []string
for k, v := range a.Labels {
if k != "alertname" && k != "severity" {
labels = append(labels, fmt.Sprintf("%s=%s", k, v))
}
}
sort.Strings(labels)
if len(labels) > 0 {
sb.WriteString(fmt.Sprintf("**Labels:** %s\n", strings.Join(labels, ", ")))
}
// Annotations
for k, v := range a.Annotations {
sb.WriteString(fmt.Sprintf("**%s:** %s\n", k, v))
}
sb.WriteString(fmt.Sprintf("**Fingerprint:** %s\n", a.Fingerprint))
sb.WriteString(fmt.Sprintf("**Started:** %s\n", a.StartsAt.Format(time.RFC3339)))
if len(a.Status.SilencedBy) > 0 {
sb.WriteString(fmt.Sprintf("**Silenced by:** %s\n", strings.Join(a.Status.SilencedBy, ", ")))
}
if len(a.Status.InhibitedBy) > 0 {
sb.WriteString(fmt.Sprintf("**Inhibited by:** %s\n", strings.Join(a.Status.InhibitedBy, ", ")))
}
sb.WriteString("\n")
}
}
return sb.String()
}
// formatTargets formats targets as grouped markdown.
func formatTargets(targets *PromTargetsData) string {
if targets == nil || len(targets.ActiveTargets) == 0 {
return "No active targets."
}
// Group by job
groups := make(map[string][]PromTarget)
var order []string
for _, t := range targets.ActiveTargets {
job := t.Labels["job"]
if _, exists := groups[job]; !exists {
order = append(order, job)
}
groups[job] = append(groups[job], t)
}
sort.Strings(order)
var sb strings.Builder
sb.WriteString(fmt.Sprintf("**%d active target(s)**\n\n", len(targets.ActiveTargets)))
// Count health statuses
healthCounts := make(map[string]int)
for _, t := range targets.ActiveTargets {
healthCounts[t.Health]++
}
var healthParts []string
for h, c := range healthCounts {
healthParts = append(healthParts, fmt.Sprintf("%s: %d", h, c))
}
sort.Strings(healthParts)
sb.WriteString(fmt.Sprintf("**Health summary:** %s\n\n", strings.Join(healthParts, ", ")))
for _, job := range order {
group := groups[job]
sb.WriteString(fmt.Sprintf("## %s (%d targets)\n\n", job, len(group)))
sb.WriteString("| Instance | Health | Last Scrape | Duration | Error |\n")
sb.WriteString("| --- | --- | --- | --- | --- |\n")
for _, t := range group {
instance := t.Labels["instance"]
lastScrape := t.LastScrape.Format("15:04:05")
duration := fmt.Sprintf("%.3fs", t.LastScrapeDuration)
lastErr := t.LastError
if lastErr == "" {
lastErr = "-"
}
sb.WriteString(fmt.Sprintf("| %s | %s | %s | %s | %s |\n",
instance, t.Health, lastScrape, duration, lastErr))
}
sb.WriteString("\n")
}
return sb.String()
}
// formatSilences formats silences as markdown.
func formatSilences(silences []Silence) string {
if len(silences) == 0 {
return "No silences found."
}
var sb strings.Builder
sb.WriteString(fmt.Sprintf("**%d silence(s)**\n\n", len(silences)))
for _, s := range silences {
state := "unknown"
if s.Status != nil {
state = s.Status.State
}
sb.WriteString(fmt.Sprintf("## Silence %s [%s]\n\n", s.ID, state))
// Matchers
var matchers []string
for _, m := range s.Matchers {
op := "="
if m.IsRegex {
op = "=~"
}
if m.IsEqual != nil && !*m.IsEqual {
if m.IsRegex {
op = "!~"
} else {
op = "!="
}
}
matchers = append(matchers, fmt.Sprintf("%s%s%s", m.Name, op, m.Value))
}
sb.WriteString(fmt.Sprintf("**Matchers:** %s\n", strings.Join(matchers, ", ")))
sb.WriteString(fmt.Sprintf("**Created by:** %s\n", s.CreatedBy))
sb.WriteString(fmt.Sprintf("**Comment:** %s\n", s.Comment))
sb.WriteString(fmt.Sprintf("**Starts:** %s\n", s.StartsAt.Format(time.RFC3339)))
sb.WriteString(fmt.Sprintf("**Ends:** %s\n", s.EndsAt.Format(time.RFC3339)))
sb.WriteString("\n")
}
return sb.String()
}
// formatMetricSearch formats metric search results.
func formatMetricSearch(names []string, metadata map[string][]PromMetadata) string {
if len(names) == 0 {
return "No metrics found matching the search."
}
var sb strings.Builder
sb.WriteString(fmt.Sprintf("**%d metric(s) found**\n\n", len(names)))
sb.WriteString("| Metric | Type | Help |\n")
sb.WriteString("| --- | --- | --- |\n")
truncated := false
for i, name := range names {
if i >= maxRows {
truncated = true
break
}
metaType := ""
help := ""
if metas, ok := metadata[name]; ok && len(metas) > 0 {
metaType = metas[0].Type
help = metas[0].Help
if len(help) > 100 {
help = help[:100] + "..."
}
}
sb.WriteString(fmt.Sprintf("| %s | %s | %s |\n", name, metaType, help))
}
if truncated {
sb.WriteString(fmt.Sprintf("\n*Showing %d of %d metrics (truncated)*\n", maxRows, len(names)))
}
return sb.String()
}
// formatMetricMetadata formats metadata for a single metric.
func formatMetricMetadata(name string, metas []PromMetadata) string {
if len(metas) == 0 {
return fmt.Sprintf("No metadata found for metric '%s'.", name)
}
var sb strings.Builder
sb.WriteString(fmt.Sprintf("# %s\n\n", name))
for _, m := range metas {
sb.WriteString(fmt.Sprintf("**Type:** %s\n", m.Type))
if m.Help != "" {
sb.WriteString(fmt.Sprintf("**Help:** %s\n", m.Help))
}
if m.Unit != "" {
sb.WriteString(fmt.Sprintf("**Unit:** %s\n", m.Unit))
}
}
return sb.String()
}

View File

@@ -0,0 +1,434 @@
package monitoring
import (
"context"
"encoding/json"
"fmt"
"strings"
"time"
"git.t-juice.club/torjus/labmcp/internal/mcp"
)
// RegisterHandlers registers all monitoring tool handlers on the MCP server.
func RegisterHandlers(server *mcp.Server, prom *PrometheusClient, am *AlertmanagerClient) {
server.RegisterTool(listAlertsTool(), makeListAlertsHandler(am))
server.RegisterTool(getAlertTool(), makeGetAlertHandler(am))
server.RegisterTool(searchMetricsTool(), makeSearchMetricsHandler(prom))
server.RegisterTool(getMetricMetadataTool(), makeGetMetricMetadataHandler(prom))
server.RegisterTool(queryTool(), makeQueryHandler(prom))
server.RegisterTool(listTargetsTool(), makeListTargetsHandler(prom))
server.RegisterTool(listSilencesTool(), makeListSilencesHandler(am))
server.RegisterTool(createSilenceTool(), makeCreateSilenceHandler(am))
}
// Tool definitions
func listAlertsTool() mcp.Tool {
return mcp.Tool{
Name: "list_alerts",
Description: "List alerts from Alertmanager with optional filters",
InputSchema: mcp.InputSchema{
Type: "object",
Properties: map[string]mcp.Property{
"state": {
Type: "string",
Description: "Filter by alert state: 'active', 'suppressed', or 'unprocessed'",
Enum: []string{"active", "suppressed", "unprocessed"},
},
"severity": {
Type: "string",
Description: "Filter by severity label (e.g., 'critical', 'warning')",
},
"receiver": {
Type: "string",
Description: "Filter by receiver name",
},
},
},
}
}
func getAlertTool() mcp.Tool {
return mcp.Tool{
Name: "get_alert",
Description: "Get full details for a specific alert by fingerprint",
InputSchema: mcp.InputSchema{
Type: "object",
Properties: map[string]mcp.Property{
"fingerprint": {
Type: "string",
Description: "Alert fingerprint identifier",
},
},
Required: []string{"fingerprint"},
},
}
}
func searchMetricsTool() mcp.Tool {
return mcp.Tool{
Name: "search_metrics",
Description: "Search Prometheus metric names with optional substring filter, enriched with metadata (type, help text)",
InputSchema: mcp.InputSchema{
Type: "object",
Properties: map[string]mcp.Property{
"query": {
Type: "string",
Description: "Substring to filter metric names (e.g., 'cpu', 'memory', 'node_'). Empty returns all metrics.",
},
"limit": {
Type: "integer",
Description: "Maximum number of results (default: 50)",
Default: 50,
},
},
},
}
}
func getMetricMetadataTool() mcp.Tool {
return mcp.Tool{
Name: "get_metric_metadata",
Description: "Get type, help text, and unit for a specific Prometheus metric",
InputSchema: mcp.InputSchema{
Type: "object",
Properties: map[string]mcp.Property{
"metric": {
Type: "string",
Description: "Metric name (e.g., 'node_cpu_seconds_total')",
},
},
Required: []string{"metric"},
},
}
}
func queryTool() mcp.Tool {
return mcp.Tool{
Name: "query",
Description: "Execute an instant PromQL query against Prometheus. Supports aggregations like avg_over_time(metric[1h]), rate(), sum(), etc.",
InputSchema: mcp.InputSchema{
Type: "object",
Properties: map[string]mcp.Property{
"promql": {
Type: "string",
Description: "PromQL expression to evaluate (e.g., 'up', 'rate(http_requests_total[5m])', 'avg_over_time(node_load1[1h])')",
},
},
Required: []string{"promql"},
},
}
}
func listTargetsTool() mcp.Tool {
return mcp.Tool{
Name: "list_targets",
Description: "List Prometheus scrape targets with health status, grouped by job",
InputSchema: mcp.InputSchema{
Type: "object",
Properties: map[string]mcp.Property{},
},
}
}
func listSilencesTool() mcp.Tool {
return mcp.Tool{
Name: "list_silences",
Description: "List active and pending alert silences from Alertmanager",
InputSchema: mcp.InputSchema{
Type: "object",
Properties: map[string]mcp.Property{},
},
}
}
func createSilenceTool() mcp.Tool {
return mcp.Tool{
Name: "create_silence",
Description: `Create a new silence in Alertmanager. IMPORTANT: Always confirm with the user before creating a silence, showing them the matchers, duration, and reason.`,
InputSchema: mcp.InputSchema{
Type: "object",
Properties: map[string]mcp.Property{
"matchers": {
Type: "string",
Description: `JSON array of matchers, e.g. [{"name":"alertname","value":"TargetDown","isRegex":false}]`,
},
"duration": {
Type: "string",
Description: "Silence duration in Go duration format (e.g., '2h', '30m', '1h30m')",
},
"author": {
Type: "string",
Description: "Author of the silence",
},
"comment": {
Type: "string",
Description: "Reason for the silence",
},
},
Required: []string{"matchers", "duration", "author", "comment"},
},
}
}
// Handler constructors
func makeListAlertsHandler(am *AlertmanagerClient) mcp.ToolHandler {
return func(ctx context.Context, args map[string]interface{}) (mcp.CallToolResult, error) {
filters := AlertFilters{}
if state, ok := args["state"].(string); ok && state != "" {
switch state {
case "active":
active := true
filters.Active = &active
silenced := false
filters.Silenced = &silenced
inhibited := false
filters.Inhibited = &inhibited
case "suppressed":
active := false
filters.Active = &active
case "unprocessed":
unprocessed := true
filters.Unprocessed = &unprocessed
}
}
if severity, ok := args["severity"].(string); ok && severity != "" {
filters.Filter = append(filters.Filter, fmt.Sprintf(`severity="%s"`, severity))
}
if receiver, ok := args["receiver"].(string); ok && receiver != "" {
filters.Receiver = receiver
}
alerts, err := am.ListAlerts(ctx, filters)
if err != nil {
return mcp.ErrorContent(fmt.Errorf("failed to list alerts: %w", err)), nil
}
return mcp.CallToolResult{
Content: []mcp.Content{mcp.TextContent(formatAlerts(alerts))},
}, nil
}
}
func makeGetAlertHandler(am *AlertmanagerClient) mcp.ToolHandler {
return func(ctx context.Context, args map[string]interface{}) (mcp.CallToolResult, error) {
fingerprint, _ := args["fingerprint"].(string)
if fingerprint == "" {
return mcp.ErrorContent(fmt.Errorf("fingerprint is required")), nil
}
// Fetch all alerts and find the one matching the fingerprint
alerts, err := am.ListAlerts(ctx, AlertFilters{})
if err != nil {
return mcp.ErrorContent(fmt.Errorf("failed to fetch alerts: %w", err)), nil
}
for _, a := range alerts {
if a.Fingerprint == fingerprint {
return mcp.CallToolResult{
Content: []mcp.Content{mcp.TextContent(formatAlerts([]Alert{a}))},
}, nil
}
}
return mcp.ErrorContent(fmt.Errorf("alert with fingerprint '%s' not found", fingerprint)), nil
}
}
func makeSearchMetricsHandler(prom *PrometheusClient) mcp.ToolHandler {
return func(ctx context.Context, args map[string]interface{}) (mcp.CallToolResult, error) {
query, _ := args["query"].(string)
limit := 50
if l, ok := args["limit"].(float64); ok && l > 0 {
limit = int(l)
}
// Get all metric names
allNames, err := prom.LabelValues(ctx, "__name__")
if err != nil {
return mcp.ErrorContent(fmt.Errorf("failed to fetch metric names: %w", err)), nil
}
// Filter by substring
var matched []string
queryLower := strings.ToLower(query)
for _, name := range allNames {
if query == "" || strings.Contains(strings.ToLower(name), queryLower) {
matched = append(matched, name)
if len(matched) >= limit {
break
}
}
}
// Fetch metadata for matched metrics
metadata, err := prom.Metadata(ctx, "")
if err != nil {
// Non-fatal: proceed without metadata
metadata = nil
}
return mcp.CallToolResult{
Content: []mcp.Content{mcp.TextContent(formatMetricSearch(matched, metadata))},
}, nil
}
}
func makeGetMetricMetadataHandler(prom *PrometheusClient) mcp.ToolHandler {
return func(ctx context.Context, args map[string]interface{}) (mcp.CallToolResult, error) {
metric, _ := args["metric"].(string)
if metric == "" {
return mcp.ErrorContent(fmt.Errorf("metric is required")), nil
}
metadata, err := prom.Metadata(ctx, metric)
if err != nil {
return mcp.ErrorContent(fmt.Errorf("failed to fetch metadata: %w", err)), nil
}
metas := metadata[metric]
return mcp.CallToolResult{
Content: []mcp.Content{mcp.TextContent(formatMetricMetadata(metric, metas))},
}, nil
}
}
func makeQueryHandler(prom *PrometheusClient) mcp.ToolHandler {
return func(ctx context.Context, args map[string]interface{}) (mcp.CallToolResult, error) {
promql, _ := args["promql"].(string)
if promql == "" {
return mcp.ErrorContent(fmt.Errorf("promql is required")), nil
}
data, err := prom.Query(ctx, promql, time.Time{})
if err != nil {
return mcp.ErrorContent(fmt.Errorf("query failed: %w", err)), nil
}
var result string
switch data.ResultType {
case "vector":
result = formatInstantVector(data.Result)
case "scalar":
if len(data.Result) > 0 && len(data.Result[0].Value) >= 2 {
if v, ok := data.Result[0].Value[1].(string); ok {
result = fmt.Sprintf("**Scalar result:** %s", v)
}
}
if result == "" {
result = "Scalar query returned no value."
}
default:
result = fmt.Sprintf("Result type: %s\n\n%s", data.ResultType, formatInstantVector(data.Result))
}
return mcp.CallToolResult{
Content: []mcp.Content{mcp.TextContent(result)},
}, nil
}
}
func makeListTargetsHandler(prom *PrometheusClient) mcp.ToolHandler {
return func(ctx context.Context, args map[string]interface{}) (mcp.CallToolResult, error) {
data, err := prom.Targets(ctx)
if err != nil {
return mcp.ErrorContent(fmt.Errorf("failed to fetch targets: %w", err)), nil
}
return mcp.CallToolResult{
Content: []mcp.Content{mcp.TextContent(formatTargets(data))},
}, nil
}
}
func makeListSilencesHandler(am *AlertmanagerClient) mcp.ToolHandler {
return func(ctx context.Context, args map[string]interface{}) (mcp.CallToolResult, error) {
silences, err := am.ListSilences(ctx)
if err != nil {
return mcp.ErrorContent(fmt.Errorf("failed to fetch silences: %w", err)), nil
}
// Filter to active/pending only
var filtered []Silence
for _, s := range silences {
if s.Status != nil && (s.Status.State == "active" || s.Status.State == "pending") {
filtered = append(filtered, s)
}
}
return mcp.CallToolResult{
Content: []mcp.Content{mcp.TextContent(formatSilences(filtered))},
}, nil
}
}
func makeCreateSilenceHandler(am *AlertmanagerClient) mcp.ToolHandler {
return func(ctx context.Context, args map[string]interface{}) (mcp.CallToolResult, error) {
matchersJSON, _ := args["matchers"].(string)
if matchersJSON == "" {
return mcp.ErrorContent(fmt.Errorf("matchers is required")), nil
}
durationStr, _ := args["duration"].(string)
if durationStr == "" {
return mcp.ErrorContent(fmt.Errorf("duration is required")), nil
}
author, _ := args["author"].(string)
if author == "" {
return mcp.ErrorContent(fmt.Errorf("author is required")), nil
}
comment, _ := args["comment"].(string)
if comment == "" {
return mcp.ErrorContent(fmt.Errorf("comment is required")), nil
}
// Parse matchers
var matchers []Matcher
if err := parseJSON(matchersJSON, &matchers); err != nil {
return mcp.ErrorContent(fmt.Errorf("invalid matchers JSON: %w", err)), nil
}
// Parse duration
duration, err := time.ParseDuration(durationStr)
if err != nil {
return mcp.ErrorContent(fmt.Errorf("invalid duration: %w", err)), nil
}
now := time.Now()
silence := Silence{
Matchers: matchers,
StartsAt: now,
EndsAt: now.Add(duration),
CreatedBy: author,
Comment: comment,
}
id, err := am.CreateSilence(ctx, silence)
if err != nil {
return mcp.ErrorContent(fmt.Errorf("failed to create silence: %w", err)), nil
}
var sb strings.Builder
sb.WriteString("Silence created successfully.\n\n")
sb.WriteString(fmt.Sprintf("**ID:** %s\n", id))
sb.WriteString(fmt.Sprintf("**Expires:** %s\n", silence.EndsAt.Format(time.RFC3339)))
sb.WriteString(fmt.Sprintf("**Author:** %s\n", author))
sb.WriteString(fmt.Sprintf("**Comment:** %s\n", comment))
return mcp.CallToolResult{
Content: []mcp.Content{mcp.TextContent(sb.String())},
}, nil
}
}
// parseJSON is a helper to unmarshal JSON from a string.
func parseJSON(s string, v interface{}) error {
return json.Unmarshal([]byte(s), v)
}

View File

@@ -0,0 +1,378 @@
package monitoring
import (
"context"
"encoding/json"
"io"
"log"
"net/http"
"net/http/httptest"
"strings"
"testing"
"git.t-juice.club/torjus/labmcp/internal/mcp"
)
// setupTestServer creates a test MCP server with monitoring handlers backed by test HTTP servers.
func setupTestServer(t *testing.T, promHandler, amHandler http.HandlerFunc) (*mcp.Server, func()) {
t.Helper()
promSrv := httptest.NewServer(promHandler)
amSrv := httptest.NewServer(amHandler)
logger := log.New(io.Discard, "", 0)
config := mcp.DefaultMonitoringConfig()
server := mcp.NewGenericServer(logger, config)
prom := NewPrometheusClient(promSrv.URL)
am := NewAlertmanagerClient(amSrv.URL)
RegisterHandlers(server, prom, am)
cleanup := func() {
promSrv.Close()
amSrv.Close()
}
return server, cleanup
}
func TestHandler_ListAlerts(t *testing.T) {
server, cleanup := setupTestServer(t,
nil,
func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`[
{
"annotations": {"summary": "Node is down"},
"endsAt": "2024-01-01T01:00:00Z",
"fingerprint": "fp1",
"receivers": [{"name": "default"}],
"startsAt": "2024-01-01T00:00:00Z",
"status": {"inhibitedBy": [], "silencedBy": [], "state": "active"},
"updatedAt": "2024-01-01T00:00:00Z",
"generatorURL": "",
"labels": {"alertname": "NodeDown", "severity": "critical"}
}
]`))
},
)
defer cleanup()
result := callTool(t, server, "list_alerts", map[string]interface{}{})
if result.IsError {
t.Fatalf("unexpected error: %s", result.Content[0].Text)
}
if !strings.Contains(result.Content[0].Text, "NodeDown") {
t.Errorf("expected output to contain 'NodeDown', got: %s", result.Content[0].Text)
}
if !strings.Contains(result.Content[0].Text, "1 alert") {
t.Errorf("expected output to contain '1 alert', got: %s", result.Content[0].Text)
}
}
func TestHandler_GetAlert(t *testing.T) {
server, cleanup := setupTestServer(t,
nil,
func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`[
{
"annotations": {"summary": "Found it"},
"endsAt": "2024-01-01T01:00:00Z",
"fingerprint": "target-fp",
"receivers": [{"name": "default"}],
"startsAt": "2024-01-01T00:00:00Z",
"status": {"inhibitedBy": [], "silencedBy": [], "state": "active"},
"updatedAt": "2024-01-01T00:00:00Z",
"generatorURL": "",
"labels": {"alertname": "TestAlert", "severity": "warning"}
},
{
"annotations": {},
"endsAt": "2024-01-01T01:00:00Z",
"fingerprint": "other-fp",
"receivers": [{"name": "default"}],
"startsAt": "2024-01-01T00:00:00Z",
"status": {"inhibitedBy": [], "silencedBy": [], "state": "active"},
"updatedAt": "2024-01-01T00:00:00Z",
"generatorURL": "",
"labels": {"alertname": "OtherAlert", "severity": "info"}
}
]`))
},
)
defer cleanup()
result := callTool(t, server, "get_alert", map[string]interface{}{
"fingerprint": "target-fp",
})
if result.IsError {
t.Fatalf("unexpected error: %s", result.Content[0].Text)
}
if !strings.Contains(result.Content[0].Text, "TestAlert") {
t.Errorf("expected output to contain 'TestAlert', got: %s", result.Content[0].Text)
}
}
func TestHandler_GetAlertNotFound(t *testing.T) {
server, cleanup := setupTestServer(t,
nil,
func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`[]`))
},
)
defer cleanup()
result := callTool(t, server, "get_alert", map[string]interface{}{
"fingerprint": "nonexistent",
})
if !result.IsError {
t.Error("expected error result for nonexistent fingerprint")
}
}
func TestHandler_Query(t *testing.T) {
server, cleanup := setupTestServer(t,
func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != "/api/v1/query" {
http.NotFound(w, r)
return
}
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{
"status": "success",
"data": {
"resultType": "vector",
"result": [
{
"metric": {"__name__": "up", "job": "node"},
"value": [1234567890, "1"]
}
]
}
}`))
},
nil,
)
defer cleanup()
result := callTool(t, server, "query", map[string]interface{}{
"promql": "up",
})
if result.IsError {
t.Fatalf("unexpected error: %s", result.Content[0].Text)
}
if !strings.Contains(result.Content[0].Text, "node") {
t.Errorf("expected output to contain 'node', got: %s", result.Content[0].Text)
}
}
func TestHandler_ListTargets(t *testing.T) {
server, cleanup := setupTestServer(t,
func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != "/api/v1/targets" {
http.NotFound(w, r)
return
}
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{
"status": "success",
"data": {
"activeTargets": [
{
"labels": {"instance": "localhost:9090", "job": "prometheus"},
"scrapePool": "prometheus",
"scrapeUrl": "http://localhost:9090/metrics",
"globalUrl": "http://localhost:9090/metrics",
"lastError": "",
"lastScrape": "2024-01-01T00:00:00Z",
"lastScrapeDuration": 0.015,
"health": "up",
"scrapeInterval": "15s",
"scrapeTimeout": "10s"
}
],
"droppedTargets": []
}
}`))
},
nil,
)
defer cleanup()
result := callTool(t, server, "list_targets", map[string]interface{}{})
if result.IsError {
t.Fatalf("unexpected error: %s", result.Content[0].Text)
}
if !strings.Contains(result.Content[0].Text, "prometheus") {
t.Errorf("expected output to contain 'prometheus', got: %s", result.Content[0].Text)
}
}
func TestHandler_SearchMetrics(t *testing.T) {
server, cleanup := setupTestServer(t,
func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
switch r.URL.Path {
case "/api/v1/label/__name__/values":
_, _ = w.Write([]byte(`{
"status": "success",
"data": ["node_cpu_seconds_total", "node_memory_MemTotal_bytes", "up"]
}`))
case "/api/v1/metadata":
_, _ = w.Write([]byte(`{
"status": "success",
"data": {
"node_cpu_seconds_total": [{"type": "counter", "help": "CPU time", "unit": ""}],
"node_memory_MemTotal_bytes": [{"type": "gauge", "help": "Total memory", "unit": "bytes"}]
}
}`))
default:
http.NotFound(w, r)
}
},
nil,
)
defer cleanup()
result := callTool(t, server, "search_metrics", map[string]interface{}{
"query": "node",
})
if result.IsError {
t.Fatalf("unexpected error: %s", result.Content[0].Text)
}
if !strings.Contains(result.Content[0].Text, "node_cpu") {
t.Errorf("expected output to contain 'node_cpu', got: %s", result.Content[0].Text)
}
// "up" should be filtered out since it doesn't match "node"
if strings.Contains(result.Content[0].Text, "| up |") {
t.Errorf("expected 'up' to be filtered out, got: %s", result.Content[0].Text)
}
}
func TestHandler_ListSilences(t *testing.T) {
server, cleanup := setupTestServer(t,
nil,
func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != "/api/v2/silences" {
http.NotFound(w, r)
return
}
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`[
{
"id": "s1",
"matchers": [{"name": "alertname", "value": "Test", "isRegex": false}],
"startsAt": "2024-01-01T00:00:00Z",
"endsAt": "2024-01-01T02:00:00Z",
"createdBy": "admin",
"comment": "Testing",
"status": {"state": "active"}
},
{
"id": "s2",
"matchers": [{"name": "job", "value": "node", "isRegex": false}],
"startsAt": "2023-01-01T00:00:00Z",
"endsAt": "2023-01-01T02:00:00Z",
"createdBy": "admin",
"comment": "Old",
"status": {"state": "expired"}
}
]`))
},
)
defer cleanup()
result := callTool(t, server, "list_silences", map[string]interface{}{})
if result.IsError {
t.Fatalf("unexpected error: %s", result.Content[0].Text)
}
// Should show active silence but filter out expired
if !strings.Contains(result.Content[0].Text, "s1") {
t.Errorf("expected active silence s1 in output, got: %s", result.Content[0].Text)
}
if strings.Contains(result.Content[0].Text, "s2") {
t.Errorf("expected expired silence s2 to be filtered out, got: %s", result.Content[0].Text)
}
}
func TestHandler_ToolCount(t *testing.T) {
server, cleanup := setupTestServer(t,
func(w http.ResponseWriter, r *http.Request) {},
func(w http.ResponseWriter, r *http.Request) {},
)
defer cleanup()
// Send a tools/list request
req := &mcp.Request{
JSONRPC: "2.0",
ID: 1,
Method: "tools/list",
}
resp := server.HandleRequest(context.Background(), req)
if resp == nil {
t.Fatal("expected response, got nil")
}
if resp.Error != nil {
t.Fatalf("unexpected error: %s", resp.Error.Message)
}
resultJSON, err := json.Marshal(resp.Result)
if err != nil {
t.Fatalf("failed to marshal result: %v", err)
}
var listResult mcp.ListToolsResult
if err := json.Unmarshal(resultJSON, &listResult); err != nil {
t.Fatalf("failed to unmarshal result: %v", err)
}
if len(listResult.Tools) != 8 {
t.Errorf("expected 8 tools, got %d", len(listResult.Tools))
for _, tool := range listResult.Tools {
t.Logf(" tool: %s", tool.Name)
}
}
}
// callTool is a test helper that calls a tool through the MCP server.
func callTool(t *testing.T, server *mcp.Server, name string, args map[string]interface{}) mcp.CallToolResult {
t.Helper()
params := mcp.CallToolParams{
Name: name,
Arguments: args,
}
paramsJSON, err := json.Marshal(params)
if err != nil {
t.Fatalf("failed to marshal params: %v", err)
}
req := &mcp.Request{
JSONRPC: "2.0",
ID: 1,
Method: "tools/call",
Params: paramsJSON,
}
resp := server.HandleRequest(context.Background(), req)
if resp == nil {
t.Fatal("expected response, got nil")
}
if resp.Error != nil {
t.Fatalf("JSON-RPC error: %s", resp.Error.Message)
}
resultJSON, err := json.Marshal(resp.Result)
if err != nil {
t.Fatalf("failed to marshal result: %v", err)
}
var result mcp.CallToolResult
if err := json.Unmarshal(resultJSON, &result); err != nil {
t.Fatalf("failed to unmarshal result: %v", err)
}
return result
}

View File

@@ -0,0 +1,135 @@
package monitoring
import (
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"time"
)
// PrometheusClient is an HTTP client for the Prometheus API.
type PrometheusClient struct {
baseURL string
httpClient *http.Client
}
// NewPrometheusClient creates a new Prometheus API client.
func NewPrometheusClient(baseURL string) *PrometheusClient {
return &PrometheusClient{
baseURL: strings.TrimRight(baseURL, "/"),
httpClient: &http.Client{
Timeout: 30 * time.Second,
},
}
}
// Query executes an instant PromQL query. If ts is zero, the current time is used.
func (c *PrometheusClient) Query(ctx context.Context, promql string, ts time.Time) (*PromQueryData, error) {
params := url.Values{}
params.Set("query", promql)
if !ts.IsZero() {
params.Set("time", fmt.Sprintf("%d", ts.Unix()))
}
body, err := c.get(ctx, "/api/v1/query", params)
if err != nil {
return nil, fmt.Errorf("query failed: %w", err)
}
var data PromQueryData
if err := json.Unmarshal(body, &data); err != nil {
return nil, fmt.Errorf("failed to parse query data: %w", err)
}
return &data, nil
}
// LabelValues returns all values for a given label name.
func (c *PrometheusClient) LabelValues(ctx context.Context, label string) ([]string, error) {
path := fmt.Sprintf("/api/v1/label/%s/values", url.PathEscape(label))
body, err := c.get(ctx, path, nil)
if err != nil {
return nil, fmt.Errorf("label values failed: %w", err)
}
var values []string
if err := json.Unmarshal(body, &values); err != nil {
return nil, fmt.Errorf("failed to parse label values: %w", err)
}
return values, nil
}
// Metadata returns metadata for metrics. If metric is empty, returns metadata for all metrics.
func (c *PrometheusClient) Metadata(ctx context.Context, metric string) (map[string][]PromMetadata, error) {
params := url.Values{}
if metric != "" {
params.Set("metric", metric)
}
body, err := c.get(ctx, "/api/v1/metadata", params)
if err != nil {
return nil, fmt.Errorf("metadata failed: %w", err)
}
var metadata map[string][]PromMetadata
if err := json.Unmarshal(body, &metadata); err != nil {
return nil, fmt.Errorf("failed to parse metadata: %w", err)
}
return metadata, nil
}
// Targets returns the current scrape targets.
func (c *PrometheusClient) Targets(ctx context.Context) (*PromTargetsData, error) {
body, err := c.get(ctx, "/api/v1/targets", nil)
if err != nil {
return nil, fmt.Errorf("targets failed: %w", err)
}
var data PromTargetsData
if err := json.Unmarshal(body, &data); err != nil {
return nil, fmt.Errorf("failed to parse targets data: %w", err)
}
return &data, nil
}
// get performs a GET request and returns the "data" field from the Prometheus response envelope.
func (c *PrometheusClient) get(ctx context.Context, path string, params url.Values) (json.RawMessage, error) {
u := c.baseURL + path
if len(params) > 0 {
u += "?" + params.Encode()
}
req, err := http.NewRequestWithContext(ctx, http.MethodGet, u, nil)
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
resp, err := c.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("request failed: %w", err)
}
defer resp.Body.Close() //nolint:errcheck // cleanup on exit
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("failed to read response: %w", err)
}
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("unexpected status %d: %s", resp.StatusCode, string(body))
}
var promResp PromResponse
if err := json.Unmarshal(body, &promResp); err != nil {
return nil, fmt.Errorf("failed to parse response: %w", err)
}
if promResp.Status != "success" {
return nil, fmt.Errorf("prometheus error (%s): %s", promResp.ErrorType, promResp.Error)
}
return promResp.Data, nil
}

View File

@@ -0,0 +1,209 @@
package monitoring
import (
"context"
"net/http"
"net/http/httptest"
"testing"
"time"
)
func TestPrometheusClient_Query(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != "/api/v1/query" {
t.Errorf("unexpected path: %s", r.URL.Path)
}
if r.URL.Query().Get("query") != "up" {
t.Errorf("unexpected query param: %s", r.URL.Query().Get("query"))
}
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{
"status": "success",
"data": {
"resultType": "vector",
"result": [
{
"metric": {"__name__": "up", "job": "prometheus", "instance": "localhost:9090"},
"value": [1234567890, "1"]
},
{
"metric": {"__name__": "up", "job": "node", "instance": "localhost:9100"},
"value": [1234567890, "0"]
}
]
}
}`))
}))
defer srv.Close()
client := NewPrometheusClient(srv.URL)
data, err := client.Query(context.Background(), "up", time.Time{})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if data.ResultType != "vector" {
t.Errorf("expected resultType=vector, got %s", data.ResultType)
}
if len(data.Result) != 2 {
t.Fatalf("expected 2 results, got %d", len(data.Result))
}
if data.Result[0].Metric["job"] != "prometheus" {
t.Errorf("expected job=prometheus, got %s", data.Result[0].Metric["job"])
}
}
func TestPrometheusClient_QueryError(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{
"status": "error",
"errorType": "bad_data",
"error": "invalid expression"
}`))
}))
defer srv.Close()
client := NewPrometheusClient(srv.URL)
_, err := client.Query(context.Background(), "invalid{", time.Time{})
if err == nil {
t.Fatal("expected error, got nil")
}
if !contains(err.Error(), "invalid expression") {
t.Errorf("expected error to contain 'invalid expression', got: %s", err.Error())
}
}
func TestPrometheusClient_LabelValues(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != "/api/v1/label/__name__/values" {
t.Errorf("unexpected path: %s", r.URL.Path)
}
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{
"status": "success",
"data": ["up", "node_cpu_seconds_total", "prometheus_build_info"]
}`))
}))
defer srv.Close()
client := NewPrometheusClient(srv.URL)
values, err := client.LabelValues(context.Background(), "__name__")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(values) != 3 {
t.Fatalf("expected 3 values, got %d", len(values))
}
if values[0] != "up" {
t.Errorf("expected first value=up, got %s", values[0])
}
}
func TestPrometheusClient_Metadata(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != "/api/v1/metadata" {
t.Errorf("unexpected path: %s", r.URL.Path)
}
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{
"status": "success",
"data": {
"up": [{"type": "gauge", "help": "Whether the target is up.", "unit": ""}],
"node_cpu_seconds_total": [{"type": "counter", "help": "CPU seconds spent.", "unit": "seconds"}]
}
}`))
}))
defer srv.Close()
client := NewPrometheusClient(srv.URL)
metadata, err := client.Metadata(context.Background(), "")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(metadata) != 2 {
t.Fatalf("expected 2 metrics, got %d", len(metadata))
}
if metadata["up"][0].Type != "gauge" {
t.Errorf("expected up type=gauge, got %s", metadata["up"][0].Type)
}
}
func TestPrometheusClient_Targets(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != "/api/v1/targets" {
t.Errorf("unexpected path: %s", r.URL.Path)
}
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{
"status": "success",
"data": {
"activeTargets": [
{
"labels": {"instance": "localhost:9090", "job": "prometheus"},
"scrapePool": "prometheus",
"scrapeUrl": "http://localhost:9090/metrics",
"globalUrl": "http://localhost:9090/metrics",
"lastError": "",
"lastScrape": "2024-01-01T00:00:00Z",
"lastScrapeDuration": 0.01,
"health": "up",
"scrapeInterval": "15s",
"scrapeTimeout": "10s"
}
],
"droppedTargets": []
}
}`))
}))
defer srv.Close()
client := NewPrometheusClient(srv.URL)
data, err := client.Targets(context.Background())
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(data.ActiveTargets) != 1 {
t.Fatalf("expected 1 active target, got %d", len(data.ActiveTargets))
}
if data.ActiveTargets[0].Health != "up" {
t.Errorf("expected health=up, got %s", data.ActiveTargets[0].Health)
}
}
func TestPrometheusClient_HTTPError(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusInternalServerError)
_, _ = w.Write([]byte("internal error"))
}))
defer srv.Close()
client := NewPrometheusClient(srv.URL)
_, err := client.Query(context.Background(), "up", time.Time{})
if err == nil {
t.Fatal("expected error, got nil")
}
if !contains(err.Error(), "500") {
t.Errorf("expected error to contain status code, got: %s", err.Error())
}
}
func contains(s, substr string) bool {
return len(s) >= len(substr) && searchString(s, substr)
}
func searchString(s, substr string) bool {
for i := 0; i <= len(s)-len(substr); i++ {
if s[i:i+len(substr)] == substr {
return true
}
}
return false
}

View File

@@ -0,0 +1,120 @@
package monitoring
import (
"encoding/json"
"time"
)
// Prometheus API response types
// PromResponse is the standard Prometheus API response envelope.
type PromResponse struct {
Status string `json:"status"`
Data json.RawMessage `json:"data,omitempty"`
ErrorType string `json:"errorType,omitempty"`
Error string `json:"error,omitempty"`
}
// PromQueryData represents the data field for query results.
type PromQueryData struct {
ResultType string `json:"resultType"`
Result []PromInstantVector `json:"result"`
}
// PromInstantVector represents a single instant vector result.
type PromInstantVector struct {
Metric map[string]string `json:"metric"`
Value [2]interface{} `json:"value"` // [timestamp, value_string]
}
// PromScalar represents a scalar query result.
type PromScalar [2]interface{} // [timestamp, value_string]
// PromMetadata represents metadata for a single metric.
type PromMetadata struct {
Type string `json:"type"`
Help string `json:"help"`
Unit string `json:"unit"`
}
// PromTarget represents a single scrape target.
type PromTarget struct {
DiscoveredLabels map[string]string `json:"discoveredLabels"`
Labels map[string]string `json:"labels"`
ScrapePool string `json:"scrapePool"`
ScrapeURL string `json:"scrapeUrl"`
GlobalURL string `json:"globalUrl"`
LastError string `json:"lastError"`
LastScrape time.Time `json:"lastScrape"`
LastScrapeDuration float64 `json:"lastScrapeDuration"`
Health string `json:"health"`
ScrapeInterval string `json:"scrapeInterval"`
ScrapeTimeout string `json:"scrapeTimeout"`
}
// PromTargetsData represents the data field for targets results.
type PromTargetsData struct {
ActiveTargets []PromTarget `json:"activeTargets"`
DroppedTargets []PromTarget `json:"droppedTargets"`
}
// Alertmanager API response types
// Alert represents an alert from the Alertmanager API v2.
type Alert struct {
Annotations map[string]string `json:"annotations"`
EndsAt time.Time `json:"endsAt"`
Fingerprint string `json:"fingerprint"`
Receivers []AlertReceiver `json:"receivers"`
StartsAt time.Time `json:"startsAt"`
Status AlertStatus `json:"status"`
UpdatedAt time.Time `json:"updatedAt"`
GeneratorURL string `json:"generatorURL"`
Labels map[string]string `json:"labels"`
}
// AlertReceiver represents an alert receiver.
type AlertReceiver struct {
Name string `json:"name"`
}
// AlertStatus represents the status of an alert.
type AlertStatus struct {
InhibitedBy []string `json:"inhibitedBy"`
SilencedBy []string `json:"silencedBy"`
State string `json:"state"` // "active", "suppressed", "unprocessed"
}
// AlertFilters contains filters for listing alerts.
type AlertFilters struct {
Active *bool
Silenced *bool
Inhibited *bool
Unprocessed *bool
Filter []string // PromQL-style label matchers, e.g. {severity="critical"}
Receiver string
}
// Silence represents a silence from the Alertmanager API v2.
type Silence struct {
ID string `json:"id,omitempty"`
Matchers []Matcher `json:"matchers"`
StartsAt time.Time `json:"startsAt"`
EndsAt time.Time `json:"endsAt"`
CreatedBy string `json:"createdBy"`
Comment string `json:"comment"`
Status *SilenceStatus `json:"status,omitempty"`
}
// SilenceStatus represents the status of a silence.
type SilenceStatus struct {
State string `json:"state"` // "active", "pending", "expired"
}
// Matcher represents a label matcher for silences.
type Matcher struct {
Name string `json:"name"`
Value string `json:"value"`
IsRegex bool `json:"isRegex"`
IsEqual *bool `json:"isEqual,omitempty"`
}