This repository has been archived on 2026-03-10. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
labmcp/internal/packages/indexer.go
Torjus Håkestad 4ae92b4f85 chore: migrate module path from git.t-juice.club to code.t-juice.club
Update Go module path and all import references for Gitea to Forgejo
host migration.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-09 19:48:25 +01:00

258 lines
7.2 KiB
Go

package packages
import (
"context"
"encoding/json"
"fmt"
"net/http"
"os"
"os/exec"
"path/filepath"
"regexp"
"strings"
"time"
"code.t-juice.club/torjus/labmcp/internal/database"
)
// revisionPattern validates revision strings to prevent injection attacks.
// Allows: alphanumeric, hyphens, underscores, dots (for channel names like "nixos-24.11"
// and git hashes). Must be 1-64 characters.
var revisionPattern = regexp.MustCompile(`^[a-zA-Z0-9._-]{1,64}$`)
// Indexer handles indexing of packages from nixpkgs revisions.
type Indexer struct {
store database.Store
httpClient *http.Client
}
// NewIndexer creates a new packages indexer.
func NewIndexer(store database.Store) *Indexer {
return &Indexer{
store: store,
httpClient: &http.Client{
Timeout: 10 * time.Minute, // Longer timeout for package evaluation
},
}
}
// ValidateRevision checks if a revision string is safe to use.
// Returns an error if the revision contains potentially dangerous characters.
func ValidateRevision(revision string) error {
if !revisionPattern.MatchString(revision) {
return fmt.Errorf("invalid revision format: must be 1-64 alphanumeric characters, hyphens, underscores, or dots")
}
return nil
}
// IndexPackages indexes packages for an existing revision.
// The revision must already exist in the database (created by options indexer).
func (idx *Indexer) IndexPackages(ctx context.Context, revisionID int64, ref string) (*IndexResult, error) {
start := time.Now()
// Validate revision to prevent injection attacks
if err := ValidateRevision(ref); err != nil {
return nil, err
}
// Build packages JSON using nix-env
packagesPath, cleanup, err := idx.buildPackages(ctx, ref)
if err != nil {
return nil, fmt.Errorf("failed to build packages: %w", err)
}
defer cleanup()
// Parse and store packages using streaming to reduce memory usage
packagesFile, err := os.Open(packagesPath)
if err != nil {
return nil, fmt.Errorf("failed to open packages.json: %w", err)
}
defer packagesFile.Close() //nolint:errcheck // read-only file
// Store packages in batches
batch := make([]*database.Package, 0, 1000)
count := 0
_, err = ParsePackagesStream(packagesFile, func(pkg *ParsedPackage) error {
dbPkg := &database.Package{
RevisionID: revisionID,
AttrPath: pkg.AttrPath,
Pname: pkg.Pname,
Version: pkg.Version,
Description: pkg.Description,
LongDescription: pkg.LongDescription,
Homepage: pkg.Homepage,
License: pkg.License,
Platforms: pkg.Platforms,
Maintainers: pkg.Maintainers,
Broken: pkg.Broken,
Unfree: pkg.Unfree,
Insecure: pkg.Insecure,
}
batch = append(batch, dbPkg)
count++
// Store in batches
if len(batch) >= 1000 {
if err := idx.store.CreatePackagesBatch(ctx, batch); err != nil {
return fmt.Errorf("failed to store packages batch: %w", err)
}
batch = batch[:0]
}
return nil
})
if err != nil {
return nil, fmt.Errorf("failed to parse packages: %w", err)
}
// Store remaining packages
if len(batch) > 0 {
if err := idx.store.CreatePackagesBatch(ctx, batch); err != nil {
return nil, fmt.Errorf("failed to store final packages batch: %w", err)
}
}
// Update revision package count
if err := idx.store.UpdateRevisionPackageCount(ctx, revisionID, count); err != nil {
return nil, fmt.Errorf("failed to update package count: %w", err)
}
return &IndexResult{
RevisionID: revisionID,
PackageCount: count,
Duration: time.Since(start),
}, nil
}
// buildPackages builds a JSON file containing all packages for a nixpkgs revision.
func (idx *Indexer) buildPackages(ctx context.Context, ref string) (string, func(), error) {
// Create temp directory
tmpDir, err := os.MkdirTemp("", "nixpkgs-packages-*")
if err != nil {
return "", nil, fmt.Errorf("failed to create temp dir: %w", err)
}
cleanup := func() {
_ = os.RemoveAll(tmpDir) //nolint:errcheck // best-effort temp dir cleanup
}
outputPath := filepath.Join(tmpDir, "packages.json")
// First, fetch the nixpkgs tarball to the nix store
// This ensures it's available for nix-env evaluation
nixExpr := fmt.Sprintf(`
builtins.fetchTarball {
url = "https://github.com/NixOS/nixpkgs/archive/%s.tar.gz";
}
`, ref)
fetchCmd := exec.CommandContext(ctx, "nix-instantiate", "--eval", "-E", nixExpr)
fetchCmd.Dir = tmpDir
fetchOutput, err := fetchCmd.Output()
if err != nil {
cleanup()
if exitErr, ok := err.(*exec.ExitError); ok {
return "", nil, fmt.Errorf("nix-instantiate fetch failed: %s", string(exitErr.Stderr))
}
return "", nil, fmt.Errorf("nix-instantiate fetch failed: %w", err)
}
// The output is the store path in quotes, e.g., "/nix/store/xxx-source"
nixpkgsPath := strings.Trim(strings.TrimSpace(string(fetchOutput)), "\"")
// Run nix-env to get all packages as JSON
// Use --json --meta to get full metadata
cmd := exec.CommandContext(ctx, "nix-env",
"-f", nixpkgsPath,
"-qaP", "--json", "--meta",
)
cmd.Dir = tmpDir
// Create output file
outputFile, err := os.Create(outputPath)
if err != nil {
cleanup()
return "", nil, fmt.Errorf("failed to create output file: %w", err)
}
cmd.Stdout = outputFile
// Suppress stderr warnings about unfree/broken packages
cmd.Stderr = nil
err = cmd.Run()
outputFile.Close() //nolint:errcheck // output file, will check stat below
if err != nil {
cleanup()
if exitErr, ok := err.(*exec.ExitError); ok {
return "", nil, fmt.Errorf("nix-env failed: %s", string(exitErr.Stderr))
}
return "", nil, fmt.Errorf("nix-env failed: %w", err)
}
// Verify output file exists and has content
stat, err := os.Stat(outputPath)
if err != nil || stat.Size() == 0 {
cleanup()
return "", nil, fmt.Errorf("packages.json not found or empty")
}
return outputPath, cleanup, nil
}
// ResolveRevision resolves a channel name or ref to a git ref.
func (idx *Indexer) ResolveRevision(revision string) string {
if ref, ok := ChannelAliases[revision]; ok {
return ref
}
return revision
}
// GetChannelName returns the channel name if the revision matches one.
func (idx *Indexer) GetChannelName(revision string) string {
if _, ok := ChannelAliases[revision]; ok {
return revision
}
// Check if the revision is a channel ref value
for name, ref := range ChannelAliases {
if ref == revision {
return name
}
}
return ""
}
// GetCommitDate gets the commit date for a git ref using GitHub API.
func (idx *Indexer) GetCommitDate(ctx context.Context, ref string) (time.Time, error) {
url := fmt.Sprintf("https://api.github.com/repos/NixOS/nixpkgs/commits/%s", ref)
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return time.Time{}, err
}
req.Header.Set("Accept", "application/vnd.github.v3+json")
resp, err := idx.httpClient.Do(req)
if err != nil {
return time.Time{}, err
}
defer resp.Body.Close() //nolint:errcheck // response body read-only
if resp.StatusCode != http.StatusOK {
return time.Time{}, fmt.Errorf("GitHub API returned %d", resp.StatusCode)
}
var commit struct {
Commit struct {
Committer struct {
Date time.Time `json:"date"`
} `json:"committer"`
} `json:"commit"`
}
if err := json.NewDecoder(resp.Body).Decode(&commit); err != nil {
return time.Time{}, err
}
return commit.Commit.Committer.Date, nil
}