This repository has been archived on 2026-03-10. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
labmcp/internal/homemanager/indexer.go
Torjus Håkestad 4ae92b4f85 chore: migrate module path from git.t-juice.club to code.t-juice.club
Update Go module path and all import references for Gitea to Forgejo
host migration.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-09 19:48:25 +01:00

429 lines
12 KiB
Go

package homemanager
import (
"archive/tar"
"compress/gzip"
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"os"
"os/exec"
"path/filepath"
"regexp"
"strings"
"time"
"code.t-juice.club/torjus/labmcp/internal/database"
"code.t-juice.club/torjus/labmcp/internal/nixos"
"code.t-juice.club/torjus/labmcp/internal/options"
)
// revisionPattern validates revision strings to prevent injection attacks.
// Allows: alphanumeric, hyphens, underscores, dots (for channel names like "release-24.11"
// and git hashes). Must be 1-64 characters.
var revisionPattern = regexp.MustCompile(`^[a-zA-Z0-9._-]{1,64}$`)
// Indexer handles indexing of home-manager revisions.
type Indexer struct {
store database.Store
httpClient *http.Client
}
// NewIndexer creates a new Home Manager indexer.
func NewIndexer(store database.Store) *Indexer {
return &Indexer{
store: store,
httpClient: &http.Client{
Timeout: 5 * time.Minute,
},
}
}
// IndexResult contains the results of an indexing operation.
type IndexResult struct {
Revision *database.Revision
OptionCount int
FileCount int
Duration time.Duration
AlreadyIndexed bool // True if revision was already indexed (skipped)
}
// ValidateRevision checks if a revision string is safe to use.
// Returns an error if the revision contains potentially dangerous characters.
func ValidateRevision(revision string) error {
if !revisionPattern.MatchString(revision) {
return fmt.Errorf("invalid revision format: must be 1-64 alphanumeric characters, hyphens, underscores, or dots")
}
return nil
}
// IndexRevision indexes a home-manager revision by git hash or channel name.
func (idx *Indexer) IndexRevision(ctx context.Context, revision string) (*options.IndexResult, error) {
start := time.Now()
// Validate revision to prevent injection attacks
if err := ValidateRevision(revision); err != nil {
return nil, err
}
// Resolve channel names to git refs
ref := idx.ResolveRevision(revision)
// Check if already indexed
existing, err := idx.store.GetRevision(ctx, ref)
if err != nil {
return nil, fmt.Errorf("failed to check existing revision: %w", err)
}
if existing != nil {
return &options.IndexResult{
Revision: existing,
OptionCount: existing.OptionCount,
Duration: time.Since(start),
AlreadyIndexed: true,
}, nil
}
// Build options.json using nix
optionsPath, cleanup, err := idx.buildOptions(ctx, ref)
if err != nil {
return nil, fmt.Errorf("failed to build options: %w", err)
}
defer cleanup()
// Parse options.json (reuse nixos parser - same format)
optionsFile, err := os.Open(optionsPath)
if err != nil {
return nil, fmt.Errorf("failed to open options.json: %w", err)
}
defer optionsFile.Close() //nolint:errcheck // read-only file
opts, err := nixos.ParseOptions(optionsFile)
if err != nil {
return nil, fmt.Errorf("failed to parse options: %w", err)
}
// Get commit info
commitDate, err := idx.getCommitDate(ctx, ref)
if err != nil {
// Non-fatal, use current time
commitDate = time.Now()
}
// Create revision record
rev := &database.Revision{
GitHash: ref,
ChannelName: idx.GetChannelName(revision),
CommitDate: commitDate,
OptionCount: len(opts),
}
if err := idx.store.CreateRevision(ctx, rev); err != nil {
return nil, fmt.Errorf("failed to create revision: %w", err)
}
// Store options
if err := idx.storeOptions(ctx, rev.ID, opts); err != nil {
// Cleanup on failure
_ = idx.store.DeleteRevision(ctx, rev.ID) //nolint:errcheck // best-effort cleanup
return nil, fmt.Errorf("failed to store options: %w", err)
}
return &options.IndexResult{
Revision: rev,
OptionCount: len(opts),
Duration: time.Since(start),
}, nil
}
// ReindexRevision forces re-indexing of a revision, deleting existing data first.
func (idx *Indexer) ReindexRevision(ctx context.Context, revision string) (*options.IndexResult, error) {
// Validate revision to prevent injection attacks
if err := ValidateRevision(revision); err != nil {
return nil, err
}
ref := idx.ResolveRevision(revision)
// Delete existing revision if present
existing, err := idx.store.GetRevision(ctx, ref)
if err != nil {
return nil, fmt.Errorf("failed to check existing revision: %w", err)
}
if existing != nil {
if err := idx.store.DeleteRevision(ctx, existing.ID); err != nil {
return nil, fmt.Errorf("failed to delete existing revision: %w", err)
}
}
// Now index fresh
return idx.IndexRevision(ctx, revision)
}
// buildOptions builds options.json for a home-manager revision.
func (idx *Indexer) buildOptions(ctx context.Context, ref string) (string, func(), error) {
// Create temp directory
tmpDir, err := os.MkdirTemp("", "hm-options-*")
if err != nil {
return "", nil, fmt.Errorf("failed to create temp dir: %w", err)
}
cleanup := func() {
_ = os.RemoveAll(tmpDir) //nolint:errcheck // best-effort temp dir cleanup
}
// Build options.json using nix-build
// This evaluates the Home Manager options from the specified revision
nixExpr := fmt.Sprintf(`
let
hm = builtins.fetchTarball {
url = "https://github.com/nix-community/home-manager/archive/%s.tar.gz";
};
nixpkgs = builtins.fetchTarball {
url = "https://github.com/NixOS/nixpkgs/archive/nixos-unstable.tar.gz";
};
pkgs = import nixpkgs { config = {}; };
lib = import (hm + "/modules/lib/stdlib-extended.nix") pkgs.lib;
docs = import (hm + "/docs") { inherit pkgs lib; release = "24.11"; isReleaseBranch = false; };
in docs.options.json
`, ref)
cmd := exec.CommandContext(ctx, "nix-build", "--no-out-link", "-E", nixExpr)
cmd.Dir = tmpDir
output, err := cmd.Output()
if err != nil {
cleanup()
if exitErr, ok := err.(*exec.ExitError); ok {
return "", nil, fmt.Errorf("nix-build failed: %s", string(exitErr.Stderr))
}
return "", nil, fmt.Errorf("nix-build failed: %w", err)
}
// The output is the store path containing share/doc/home-manager/options.json
storePath := strings.TrimSpace(string(output))
optionsPath := filepath.Join(storePath, "share", "doc", "home-manager", "options.json")
if _, err := os.Stat(optionsPath); err != nil {
cleanup()
return "", nil, fmt.Errorf("options.json not found at %s", optionsPath)
}
return optionsPath, cleanup, nil
}
// storeOptions stores parsed options in the database.
func (idx *Indexer) storeOptions(ctx context.Context, revisionID int64, opts map[string]*nixos.ParsedOption) error {
// Prepare batch of options
dbOpts := make([]*database.Option, 0, len(opts))
declsByName := make(map[string][]*database.Declaration)
for name, opt := range opts {
dbOpt := &database.Option{
RevisionID: revisionID,
Name: name,
ParentPath: database.ParentPath(name),
Type: opt.Type,
DefaultValue: opt.Default,
Example: opt.Example,
Description: opt.Description,
ReadOnly: opt.ReadOnly,
}
dbOpts = append(dbOpts, dbOpt)
// Prepare declarations for this option
decls := make([]*database.Declaration, 0, len(opt.Declarations))
for _, path := range opt.Declarations {
decls = append(decls, &database.Declaration{
FilePath: path,
})
}
declsByName[name] = decls
}
// Store options in batches
batchSize := 1000
for i := 0; i < len(dbOpts); i += batchSize {
end := i + batchSize
if end > len(dbOpts) {
end = len(dbOpts)
}
batch := dbOpts[i:end]
if err := idx.store.CreateOptionsBatch(ctx, batch); err != nil {
return fmt.Errorf("failed to store options batch: %w", err)
}
}
// Store declarations
for _, opt := range dbOpts {
decls := declsByName[opt.Name]
for _, decl := range decls {
decl.OptionID = opt.ID
}
if len(decls) > 0 {
if err := idx.store.CreateDeclarationsBatch(ctx, decls); err != nil {
return fmt.Errorf("failed to store declarations for %s: %w", opt.Name, err)
}
}
}
return nil
}
// getCommitDate gets the commit date for a git ref.
func (idx *Indexer) getCommitDate(ctx context.Context, ref string) (time.Time, error) {
// Use GitHub API to get commit info
url := fmt.Sprintf("https://api.github.com/repos/nix-community/home-manager/commits/%s", ref)
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return time.Time{}, err
}
req.Header.Set("Accept", "application/vnd.github.v3+json")
resp, err := idx.httpClient.Do(req)
if err != nil {
return time.Time{}, err
}
defer resp.Body.Close() //nolint:errcheck // response body read-only
if resp.StatusCode != http.StatusOK {
return time.Time{}, fmt.Errorf("GitHub API returned %d", resp.StatusCode)
}
var commit struct {
Commit struct {
Committer struct {
Date time.Time `json:"date"`
} `json:"committer"`
} `json:"commit"`
}
if err := json.NewDecoder(resp.Body).Decode(&commit); err != nil {
return time.Time{}, err
}
return commit.Commit.Committer.Date, nil
}
// ResolveRevision resolves a channel name or ref to a git ref.
func (idx *Indexer) ResolveRevision(revision string) string {
// Check if it's a known channel alias
if ref, ok := ChannelAliases[revision]; ok {
return ref
}
return revision
}
// GetChannelName returns the channel name if the revision matches one.
func (idx *Indexer) GetChannelName(revision string) string {
if _, ok := ChannelAliases[revision]; ok {
return revision
}
// Check if the revision is a channel ref value
for name, ref := range ChannelAliases {
if ref == revision {
return name
}
}
return ""
}
// IndexFiles indexes files from a home-manager tarball.
// This is a separate operation that can be run after IndexRevision.
func (idx *Indexer) IndexFiles(ctx context.Context, revisionID int64, ref string) (int, error) {
// Download home-manager tarball
url := fmt.Sprintf("https://github.com/nix-community/home-manager/archive/%s.tar.gz", ref)
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return 0, fmt.Errorf("failed to create request: %w", err)
}
resp, err := idx.httpClient.Do(req)
if err != nil {
return 0, fmt.Errorf("failed to download tarball: %w", err)
}
defer resp.Body.Close() //nolint:errcheck // response body read-only
if resp.StatusCode != http.StatusOK {
return 0, fmt.Errorf("download failed with status %d", resp.StatusCode)
}
// Extract and index files
gz, err := gzip.NewReader(resp.Body)
if err != nil {
return 0, fmt.Errorf("failed to create gzip reader: %w", err)
}
defer gz.Close() //nolint:errcheck // gzip reader read-only
tr := tar.NewReader(gz)
count := 0
batch := make([]*database.File, 0, 100)
for {
header, err := tr.Next()
if err == io.EOF {
break
}
if err != nil {
return count, fmt.Errorf("tar read error: %w", err)
}
// Skip directories
if header.Typeflag != tar.TypeReg {
continue
}
// Check file extension
ext := filepath.Ext(header.Name)
if !AllowedExtensions[ext] {
continue
}
// Skip very large files (> 1MB)
if header.Size > 1024*1024 {
continue
}
// Remove the top-level directory (home-manager-<hash>/)
path := header.Name
if i := strings.Index(path, "/"); i >= 0 {
path = path[i+1:]
}
// Read content
content, err := io.ReadAll(tr)
if err != nil {
continue
}
file := &database.File{
RevisionID: revisionID,
FilePath: path,
Extension: ext,
Content: string(content),
}
batch = append(batch, file)
count++
// Store in batches
if len(batch) >= 100 {
if err := idx.store.CreateFilesBatch(ctx, batch); err != nil {
return count, fmt.Errorf("failed to store files batch: %w", err)
}
batch = batch[:0]
}
}
// Store remaining files
if len(batch) > 0 {
if err := idx.store.CreateFilesBatch(ctx, batch); err != nil {
return count, fmt.Errorf("failed to store final files batch: %w", err)
}
}
return count, nil
}