feat: add nixpkgs-search binary with package search support

Add a new nixpkgs-search CLI that combines NixOS options search with
Nix package search functionality. This provides two MCP servers from
a single binary:
- `nixpkgs-search options serve` for NixOS options
- `nixpkgs-search packages serve` for Nix packages

Key changes:
- Add packages table to database schema (version 3)
- Add Package type and search methods to database layer
- Create internal/packages/ with indexer and parser for nix-env JSON
- Add MCP server mode (options/packages) with separate tool sets
- Add package handlers: search_packages, get_package
- Create cmd/nixpkgs-search with combined indexing support
- Update flake.nix with nixpkgs-search package (now default)
- Bump version to 0.2.0

The index command can index both options and packages together, or
use --no-packages/--no-options flags for partial indexing.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-02-04 17:12:41 +01:00
parent 9efcca217c
commit ea4c69bc23
17 changed files with 2559 additions and 63 deletions

View File

@@ -0,0 +1,257 @@
package packages
import (
"context"
"encoding/json"
"fmt"
"net/http"
"os"
"os/exec"
"path/filepath"
"regexp"
"strings"
"time"
"git.t-juice.club/torjus/labmcp/internal/database"
)
// revisionPattern validates revision strings to prevent injection attacks.
// Allows: alphanumeric, hyphens, underscores, dots (for channel names like "nixos-24.11"
// and git hashes). Must be 1-64 characters.
var revisionPattern = regexp.MustCompile(`^[a-zA-Z0-9._-]{1,64}$`)
// Indexer handles indexing of packages from nixpkgs revisions.
type Indexer struct {
store database.Store
httpClient *http.Client
}
// NewIndexer creates a new packages indexer.
func NewIndexer(store database.Store) *Indexer {
return &Indexer{
store: store,
httpClient: &http.Client{
Timeout: 10 * time.Minute, // Longer timeout for package evaluation
},
}
}
// ValidateRevision checks if a revision string is safe to use.
// Returns an error if the revision contains potentially dangerous characters.
func ValidateRevision(revision string) error {
if !revisionPattern.MatchString(revision) {
return fmt.Errorf("invalid revision format: must be 1-64 alphanumeric characters, hyphens, underscores, or dots")
}
return nil
}
// IndexPackages indexes packages for an existing revision.
// The revision must already exist in the database (created by options indexer).
func (idx *Indexer) IndexPackages(ctx context.Context, revisionID int64, ref string) (*IndexResult, error) {
start := time.Now()
// Validate revision to prevent injection attacks
if err := ValidateRevision(ref); err != nil {
return nil, err
}
// Build packages JSON using nix-env
packagesPath, cleanup, err := idx.buildPackages(ctx, ref)
if err != nil {
return nil, fmt.Errorf("failed to build packages: %w", err)
}
defer cleanup()
// Parse and store packages using streaming to reduce memory usage
packagesFile, err := os.Open(packagesPath)
if err != nil {
return nil, fmt.Errorf("failed to open packages.json: %w", err)
}
defer packagesFile.Close() //nolint:errcheck // read-only file
// Store packages in batches
batch := make([]*database.Package, 0, 1000)
count := 0
_, err = ParsePackagesStream(packagesFile, func(pkg *ParsedPackage) error {
dbPkg := &database.Package{
RevisionID: revisionID,
AttrPath: pkg.AttrPath,
Pname: pkg.Pname,
Version: pkg.Version,
Description: pkg.Description,
LongDescription: pkg.LongDescription,
Homepage: pkg.Homepage,
License: pkg.License,
Platforms: pkg.Platforms,
Maintainers: pkg.Maintainers,
Broken: pkg.Broken,
Unfree: pkg.Unfree,
Insecure: pkg.Insecure,
}
batch = append(batch, dbPkg)
count++
// Store in batches
if len(batch) >= 1000 {
if err := idx.store.CreatePackagesBatch(ctx, batch); err != nil {
return fmt.Errorf("failed to store packages batch: %w", err)
}
batch = batch[:0]
}
return nil
})
if err != nil {
return nil, fmt.Errorf("failed to parse packages: %w", err)
}
// Store remaining packages
if len(batch) > 0 {
if err := idx.store.CreatePackagesBatch(ctx, batch); err != nil {
return nil, fmt.Errorf("failed to store final packages batch: %w", err)
}
}
// Update revision package count
if err := idx.store.UpdateRevisionPackageCount(ctx, revisionID, count); err != nil {
return nil, fmt.Errorf("failed to update package count: %w", err)
}
return &IndexResult{
RevisionID: revisionID,
PackageCount: count,
Duration: time.Since(start),
}, nil
}
// buildPackages builds a JSON file containing all packages for a nixpkgs revision.
func (idx *Indexer) buildPackages(ctx context.Context, ref string) (string, func(), error) {
// Create temp directory
tmpDir, err := os.MkdirTemp("", "nixpkgs-packages-*")
if err != nil {
return "", nil, fmt.Errorf("failed to create temp dir: %w", err)
}
cleanup := func() {
_ = os.RemoveAll(tmpDir) //nolint:errcheck // best-effort temp dir cleanup
}
outputPath := filepath.Join(tmpDir, "packages.json")
// First, fetch the nixpkgs tarball to the nix store
// This ensures it's available for nix-env evaluation
nixExpr := fmt.Sprintf(`
builtins.fetchTarball {
url = "https://github.com/NixOS/nixpkgs/archive/%s.tar.gz";
}
`, ref)
fetchCmd := exec.CommandContext(ctx, "nix-instantiate", "--eval", "-E", nixExpr)
fetchCmd.Dir = tmpDir
fetchOutput, err := fetchCmd.Output()
if err != nil {
cleanup()
if exitErr, ok := err.(*exec.ExitError); ok {
return "", nil, fmt.Errorf("nix-instantiate fetch failed: %s", string(exitErr.Stderr))
}
return "", nil, fmt.Errorf("nix-instantiate fetch failed: %w", err)
}
// The output is the store path in quotes, e.g., "/nix/store/xxx-source"
nixpkgsPath := strings.Trim(strings.TrimSpace(string(fetchOutput)), "\"")
// Run nix-env to get all packages as JSON
// Use --json --meta to get full metadata
cmd := exec.CommandContext(ctx, "nix-env",
"-f", nixpkgsPath,
"-qaP", "--json", "--meta",
)
cmd.Dir = tmpDir
// Create output file
outputFile, err := os.Create(outputPath)
if err != nil {
cleanup()
return "", nil, fmt.Errorf("failed to create output file: %w", err)
}
cmd.Stdout = outputFile
// Suppress stderr warnings about unfree/broken packages
cmd.Stderr = nil
err = cmd.Run()
outputFile.Close() //nolint:errcheck // output file, will check stat below
if err != nil {
cleanup()
if exitErr, ok := err.(*exec.ExitError); ok {
return "", nil, fmt.Errorf("nix-env failed: %s", string(exitErr.Stderr))
}
return "", nil, fmt.Errorf("nix-env failed: %w", err)
}
// Verify output file exists and has content
stat, err := os.Stat(outputPath)
if err != nil || stat.Size() == 0 {
cleanup()
return "", nil, fmt.Errorf("packages.json not found or empty")
}
return outputPath, cleanup, nil
}
// ResolveRevision resolves a channel name or ref to a git ref.
func (idx *Indexer) ResolveRevision(revision string) string {
if ref, ok := ChannelAliases[revision]; ok {
return ref
}
return revision
}
// GetChannelName returns the channel name if the revision matches one.
func (idx *Indexer) GetChannelName(revision string) string {
if _, ok := ChannelAliases[revision]; ok {
return revision
}
// Check if the revision is a channel ref value
for name, ref := range ChannelAliases {
if ref == revision {
return name
}
}
return ""
}
// GetCommitDate gets the commit date for a git ref using GitHub API.
func (idx *Indexer) GetCommitDate(ctx context.Context, ref string) (time.Time, error) {
url := fmt.Sprintf("https://api.github.com/repos/NixOS/nixpkgs/commits/%s", ref)
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return time.Time{}, err
}
req.Header.Set("Accept", "application/vnd.github.v3+json")
resp, err := idx.httpClient.Do(req)
if err != nil {
return time.Time{}, err
}
defer resp.Body.Close() //nolint:errcheck // response body read-only
if resp.StatusCode != http.StatusOK {
return time.Time{}, fmt.Errorf("GitHub API returned %d", resp.StatusCode)
}
var commit struct {
Commit struct {
Committer struct {
Date time.Time `json:"date"`
} `json:"committer"`
} `json:"commit"`
}
if err := json.NewDecoder(resp.Body).Decode(&commit); err != nil {
return time.Time{}, err
}
return commit.Commit.Committer.Date, nil
}

View File

@@ -0,0 +1,82 @@
package packages
import (
"testing"
)
func TestValidateRevision(t *testing.T) {
tests := []struct {
name string
revision string
expectErr bool
}{
{"valid hash", "abc123def456", false},
{"valid channel", "nixos-unstable", false},
{"valid version channel", "nixos-24.11", false},
{"empty", "", true},
{"too long", "a" + string(make([]byte, 100)), true},
{"shell injection", "$(rm -rf /)", true},
{"path traversal", "../../../etc/passwd", true},
{"semicolon", "abc;rm -rf /", true},
{"backtick", "`whoami`", true},
{"space", "abc def", true},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
err := ValidateRevision(tc.revision)
if tc.expectErr && err == nil {
t.Error("Expected error, got nil")
}
if !tc.expectErr && err != nil {
t.Errorf("Expected no error, got %v", err)
}
})
}
}
func TestResolveRevision(t *testing.T) {
idx := &Indexer{}
tests := []struct {
input string
expected string
}{
{"nixos-unstable", "nixos-unstable"},
{"nixos-stable", "nixos-24.11"},
{"nixos-24.11", "nixos-24.11"},
{"abc123", "abc123"},
}
for _, tc := range tests {
t.Run(tc.input, func(t *testing.T) {
result := idx.ResolveRevision(tc.input)
if result != tc.expected {
t.Errorf("Expected %q, got %q", tc.expected, result)
}
})
}
}
func TestGetChannelName(t *testing.T) {
idx := &Indexer{}
tests := []struct {
input string
expected string
}{
{"nixos-unstable", "nixos-unstable"},
{"nixos-stable", "nixos-stable"},
{"nixos-24.11", "nixos-24.11"},
{"abc123", ""},
}
for _, tc := range tests {
t.Run(tc.input, func(t *testing.T) {
result := idx.GetChannelName(tc.input)
if result != tc.expected {
t.Errorf("Expected %q, got %q", tc.expected, result)
}
})
}
}

199
internal/packages/parser.go Normal file
View File

@@ -0,0 +1,199 @@
package packages
import (
"encoding/json"
"fmt"
"io"
"strings"
)
// ParsePackages reads and parses a nix-env JSON output file.
func ParsePackages(r io.Reader) (map[string]*ParsedPackage, error) {
var raw PackagesFile
if err := json.NewDecoder(r).Decode(&raw); err != nil {
return nil, fmt.Errorf("failed to decode packages JSON: %w", err)
}
packages := make(map[string]*ParsedPackage, len(raw))
for attrPath, pkg := range raw {
parsed := &ParsedPackage{
AttrPath: attrPath,
Pname: pkg.Pname,
Version: pkg.Version,
Description: pkg.Meta.Description,
LongDescription: pkg.Meta.LongDescription,
Homepage: normalizeHomepage(pkg.Meta.Homepage),
License: normalizeLicense(pkg.Meta.License),
Platforms: normalizePlatforms(pkg.Meta.Platforms),
Maintainers: normalizeMaintainers(pkg.Meta.Maintainers),
Broken: pkg.Meta.Broken,
Unfree: pkg.Meta.Unfree,
Insecure: pkg.Meta.Insecure,
}
packages[attrPath] = parsed
}
return packages, nil
}
// normalizeHomepage converts homepage to a string.
func normalizeHomepage(v interface{}) string {
if v == nil {
return ""
}
switch hp := v.(type) {
case string:
return hp
case []interface{}:
if len(hp) > 0 {
if s, ok := hp[0].(string); ok {
return s
}
}
}
return ""
}
// normalizeLicense converts license to a JSON array string.
func normalizeLicense(v interface{}) string {
if v == nil {
return "[]"
}
licenses := make([]string, 0)
switch l := v.(type) {
case string:
licenses = append(licenses, l)
case map[string]interface{}:
// Single license object
if spdxID, ok := l["spdxId"].(string); ok {
licenses = append(licenses, spdxID)
} else if fullName, ok := l["fullName"].(string); ok {
licenses = append(licenses, fullName)
} else if shortName, ok := l["shortName"].(string); ok {
licenses = append(licenses, shortName)
}
case []interface{}:
for _, item := range l {
switch li := item.(type) {
case string:
licenses = append(licenses, li)
case map[string]interface{}:
if spdxID, ok := li["spdxId"].(string); ok {
licenses = append(licenses, spdxID)
} else if fullName, ok := li["fullName"].(string); ok {
licenses = append(licenses, fullName)
} else if shortName, ok := li["shortName"].(string); ok {
licenses = append(licenses, shortName)
}
}
}
}
data, _ := json.Marshal(licenses)
return string(data)
}
// normalizePlatforms converts platforms to a JSON array string.
func normalizePlatforms(v []interface{}) string {
if v == nil {
return "[]"
}
platforms := make([]string, 0, len(v))
for _, p := range v {
switch pv := p.(type) {
case string:
platforms = append(platforms, pv)
// Skip complex platform specs (objects)
}
}
data, _ := json.Marshal(platforms)
return string(data)
}
// normalizeMaintainers converts maintainers to a JSON array string.
func normalizeMaintainers(maintainers []Maintainer) string {
if len(maintainers) == 0 {
return "[]"
}
names := make([]string, 0, len(maintainers))
for _, m := range maintainers {
name := m.Name
if name == "" && m.Github != "" {
name = "@" + m.Github
}
if name != "" {
names = append(names, name)
}
}
data, _ := json.Marshal(names)
return string(data)
}
// ParsePackagesStream parses packages from a reader using streaming to reduce memory usage.
// It yields parsed packages through a callback function.
func ParsePackagesStream(r io.Reader, callback func(*ParsedPackage) error) (int, error) {
dec := json.NewDecoder(r)
// Read the opening brace
t, err := dec.Token()
if err != nil {
return 0, fmt.Errorf("failed to read opening token: %w", err)
}
if delim, ok := t.(json.Delim); !ok || delim != '{' {
return 0, fmt.Errorf("expected opening brace, got %v", t)
}
count := 0
for dec.More() {
// Read the key (attr path)
t, err := dec.Token()
if err != nil {
return count, fmt.Errorf("failed to read attr path: %w", err)
}
attrPath, ok := t.(string)
if !ok {
return count, fmt.Errorf("expected string key, got %T", t)
}
// Read the value (package)
var pkg RawPackage
if err := dec.Decode(&pkg); err != nil {
// Skip malformed packages
continue
}
parsed := &ParsedPackage{
AttrPath: attrPath,
Pname: pkg.Pname,
Version: pkg.Version,
Description: pkg.Meta.Description,
LongDescription: pkg.Meta.LongDescription,
Homepage: normalizeHomepage(pkg.Meta.Homepage),
License: normalizeLicense(pkg.Meta.License),
Platforms: normalizePlatforms(pkg.Meta.Platforms),
Maintainers: normalizeMaintainers(pkg.Meta.Maintainers),
Broken: pkg.Meta.Broken,
Unfree: pkg.Meta.Unfree,
Insecure: pkg.Meta.Insecure,
}
if err := callback(parsed); err != nil {
return count, fmt.Errorf("callback error for %s: %w", attrPath, err)
}
count++
}
return count, nil
}
// SplitAttrPath splits an attribute path into its components.
// For example, "python312Packages.requests" returns ["python312Packages", "requests"].
func SplitAttrPath(attrPath string) []string {
return strings.Split(attrPath, ".")
}

View File

@@ -0,0 +1,215 @@
package packages
import (
"strings"
"testing"
)
func TestParsePackages(t *testing.T) {
input := `{
"firefox": {
"name": "firefox-120.0",
"pname": "firefox",
"version": "120.0",
"system": "x86_64-linux",
"meta": {
"description": "A web browser built from Firefox source tree",
"homepage": "https://www.mozilla.org/firefox/",
"license": {"spdxId": "MPL-2.0", "fullName": "Mozilla Public License 2.0"},
"maintainers": [
{"name": "John Doe", "github": "johndoe", "githubId": 12345}
],
"platforms": ["x86_64-linux", "aarch64-linux"]
}
},
"python312Packages.requests": {
"name": "python3.12-requests-2.31.0",
"pname": "requests",
"version": "2.31.0",
"system": "x86_64-linux",
"meta": {
"description": "HTTP library for Python",
"homepage": ["https://requests.readthedocs.io/"],
"license": [{"spdxId": "Apache-2.0"}],
"unfree": false
}
}
}`
packages, err := ParsePackages(strings.NewReader(input))
if err != nil {
t.Fatalf("ParsePackages failed: %v", err)
}
if len(packages) != 2 {
t.Errorf("Expected 2 packages, got %d", len(packages))
}
// Check firefox
firefox, ok := packages["firefox"]
if !ok {
t.Fatal("firefox package not found")
}
if firefox.Pname != "firefox" {
t.Errorf("Expected pname 'firefox', got %q", firefox.Pname)
}
if firefox.Version != "120.0" {
t.Errorf("Expected version '120.0', got %q", firefox.Version)
}
if firefox.Homepage != "https://www.mozilla.org/firefox/" {
t.Errorf("Expected homepage 'https://www.mozilla.org/firefox/', got %q", firefox.Homepage)
}
if firefox.License != `["MPL-2.0"]` {
t.Errorf("Expected license '[\"MPL-2.0\"]', got %q", firefox.License)
}
// Check python requests
requests, ok := packages["python312Packages.requests"]
if !ok {
t.Fatal("python312Packages.requests package not found")
}
if requests.Pname != "requests" {
t.Errorf("Expected pname 'requests', got %q", requests.Pname)
}
// Homepage is array, should extract first element
if requests.Homepage != "https://requests.readthedocs.io/" {
t.Errorf("Expected homepage 'https://requests.readthedocs.io/', got %q", requests.Homepage)
}
}
func TestParsePackagesStream(t *testing.T) {
input := `{
"hello": {
"name": "hello-2.12",
"pname": "hello",
"version": "2.12",
"system": "x86_64-linux",
"meta": {
"description": "A program that produces a familiar, friendly greeting"
}
},
"world": {
"name": "world-1.0",
"pname": "world",
"version": "1.0",
"system": "x86_64-linux",
"meta": {}
}
}`
var packages []*ParsedPackage
count, err := ParsePackagesStream(strings.NewReader(input), func(pkg *ParsedPackage) error {
packages = append(packages, pkg)
return nil
})
if err != nil {
t.Fatalf("ParsePackagesStream failed: %v", err)
}
if count != 2 {
t.Errorf("Expected count 2, got %d", count)
}
if len(packages) != 2 {
t.Errorf("Expected 2 packages, got %d", len(packages))
}
}
func TestNormalizeLicense(t *testing.T) {
tests := []struct {
name string
input interface{}
expected string
}{
{"nil", nil, "[]"},
{"string", "MIT", `["MIT"]`},
{"object with spdxId", map[string]interface{}{"spdxId": "MIT"}, `["MIT"]`},
{"object with fullName", map[string]interface{}{"fullName": "MIT License"}, `["MIT License"]`},
{"array of strings", []interface{}{"MIT", "Apache-2.0"}, `["MIT","Apache-2.0"]`},
{"array of objects", []interface{}{
map[string]interface{}{"spdxId": "MIT"},
map[string]interface{}{"spdxId": "Apache-2.0"},
}, `["MIT","Apache-2.0"]`},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
result := normalizeLicense(tc.input)
if result != tc.expected {
t.Errorf("Expected %q, got %q", tc.expected, result)
}
})
}
}
func TestNormalizeHomepage(t *testing.T) {
tests := []struct {
name string
input interface{}
expected string
}{
{"nil", nil, ""},
{"string", "https://example.com", "https://example.com"},
{"array", []interface{}{"https://example.com", "https://docs.example.com"}, "https://example.com"},
{"empty array", []interface{}{}, ""},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
result := normalizeHomepage(tc.input)
if result != tc.expected {
t.Errorf("Expected %q, got %q", tc.expected, result)
}
})
}
}
func TestNormalizeMaintainers(t *testing.T) {
tests := []struct {
name string
maintainers []Maintainer
expected string
}{
{"empty", nil, "[]"},
{"with name", []Maintainer{{Name: "John Doe"}}, `["John Doe"]`},
{"with github only", []Maintainer{{Github: "johndoe"}}, `["@johndoe"]`},
{"multiple", []Maintainer{{Name: "Alice"}, {Name: "Bob"}}, `["Alice","Bob"]`},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
result := normalizeMaintainers(tc.maintainers)
if result != tc.expected {
t.Errorf("Expected %q, got %q", tc.expected, result)
}
})
}
}
func TestSplitAttrPath(t *testing.T) {
tests := []struct {
input string
expected []string
}{
{"firefox", []string{"firefox"}},
{"python312Packages.requests", []string{"python312Packages", "requests"}},
{"haskellPackages.aeson.components.library", []string{"haskellPackages", "aeson", "components", "library"}},
}
for _, tc := range tests {
t.Run(tc.input, func(t *testing.T) {
result := SplitAttrPath(tc.input)
if len(result) != len(tc.expected) {
t.Errorf("Expected %v, got %v", tc.expected, result)
return
}
for i := range result {
if result[i] != tc.expected[i] {
t.Errorf("Expected %v, got %v", tc.expected, result)
return
}
}
})
}
}

View File

@@ -0,0 +1,78 @@
// Package packages contains types and logic for indexing Nix packages.
package packages
// RawPackage represents a package as parsed from nix-env --json output.
type RawPackage struct {
Pname string `json:"pname"`
Version string `json:"version"`
System string `json:"system"`
Meta RawPackageMeta `json:"meta"`
Name string `json:"name"`
OutputName string `json:"outputName,omitempty"`
Outputs map[string]interface{} `json:"outputs,omitempty"`
}
// RawPackageMeta contains package metadata.
type RawPackageMeta struct {
Available bool `json:"available,omitempty"`
Broken bool `json:"broken,omitempty"`
Description string `json:"description,omitempty"`
Homepage interface{} `json:"homepage,omitempty"` // Can be string or []string
Insecure bool `json:"insecure,omitempty"`
License interface{} `json:"license,omitempty"` // Can be string, object, or []interface{}
LongDescription string `json:"longDescription,omitempty"`
Maintainers []Maintainer `json:"maintainers,omitempty"`
Name string `json:"name,omitempty"`
OutputsToInstall []string `json:"outputsToInstall,omitempty"`
Platforms []interface{} `json:"platforms,omitempty"` // Can be strings or objects
Position string `json:"position,omitempty"`
Unfree bool `json:"unfree,omitempty"`
}
// Maintainer represents a package maintainer.
type Maintainer struct {
Email string `json:"email,omitempty"`
Github string `json:"github,omitempty"`
GithubID int `json:"githubId,omitempty"`
Matrix string `json:"matrix,omitempty"`
Name string `json:"name,omitempty"`
}
// ParsedPackage represents a package ready for database storage.
type ParsedPackage struct {
AttrPath string
Pname string
Version string
Description string
LongDescription string
Homepage string
License string // JSON array
Platforms string // JSON array
Maintainers string // JSON array
Broken bool
Unfree bool
Insecure bool
}
// PackagesFile represents the top-level structure of nix-env JSON output.
// It's a map from attr path to package definition.
type PackagesFile map[string]RawPackage
// ChannelAliases maps friendly channel names to their git branch/ref patterns.
// These are the same as NixOS options since packages come from the same repo.
var ChannelAliases = map[string]string{
"nixos-unstable": "nixos-unstable",
"nixos-stable": "nixos-24.11",
"nixos-24.11": "nixos-24.11",
"nixos-24.05": "nixos-24.05",
"nixos-23.11": "nixos-23.11",
"nixos-23.05": "nixos-23.05",
}
// IndexResult contains the results of a package indexing operation.
type IndexResult struct {
RevisionID int64
PackageCount int
Duration interface{} // time.Duration - kept as interface to avoid import cycle
AlreadyIndexed bool // True if revision already has packages
}