package nixos import ( "archive/tar" "compress/gzip" "context" "encoding/json" "fmt" "io" "net/http" "os" "os/exec" "path/filepath" "regexp" "strings" "time" "git.t-juice.club/torjus/labmcp/internal/database" "git.t-juice.club/torjus/labmcp/internal/options" ) // revisionPattern validates revision strings to prevent injection attacks. // Allows: alphanumeric, hyphens, underscores, dots (for channel names like "nixos-24.11" // and git hashes). Must be 1-64 characters. var revisionPattern = regexp.MustCompile(`^[a-zA-Z0-9._-]{1,64}$`) // Indexer handles indexing of nixpkgs revisions. type Indexer struct { store database.Store httpClient *http.Client } // NewIndexer creates a new indexer. func NewIndexer(store database.Store) *Indexer { return &Indexer{ store: store, httpClient: &http.Client{ Timeout: 5 * time.Minute, }, } } // IndexResult contains the results of an indexing operation. // Deprecated: Use options.IndexResult instead. type IndexResult = options.IndexResult // ValidateRevision checks if a revision string is safe to use. // Returns an error if the revision contains potentially dangerous characters. func ValidateRevision(revision string) error { if !revisionPattern.MatchString(revision) { return fmt.Errorf("invalid revision format: must be 1-64 alphanumeric characters, hyphens, underscores, or dots") } return nil } // IndexRevision indexes a nixpkgs revision by git hash or channel name. func (idx *Indexer) IndexRevision(ctx context.Context, revision string) (*IndexResult, error) { start := time.Now() // Validate revision to prevent injection attacks if err := ValidateRevision(revision); err != nil { return nil, err } // Resolve channel names to git refs ref := resolveRevision(revision) // Check if already indexed existing, err := idx.store.GetRevision(ctx, ref) if err != nil { return nil, fmt.Errorf("failed to check existing revision: %w", err) } if existing != nil { return &IndexResult{ Revision: existing, OptionCount: existing.OptionCount, Duration: time.Since(start), AlreadyIndexed: true, }, nil } // Build options.json using nix optionsPath, cleanup, err := idx.buildOptions(ctx, ref) if err != nil { return nil, fmt.Errorf("failed to build options: %w", err) } defer cleanup() // Parse options.json optionsFile, err := os.Open(optionsPath) if err != nil { return nil, fmt.Errorf("failed to open options.json: %w", err) } defer optionsFile.Close() options, err := ParseOptions(optionsFile) if err != nil { return nil, fmt.Errorf("failed to parse options: %w", err) } // Get commit info commitDate, err := idx.getCommitDate(ctx, ref) if err != nil { // Non-fatal, use current time commitDate = time.Now() } // Create revision record rev := &database.Revision{ GitHash: ref, ChannelName: getChannelName(revision), CommitDate: commitDate, OptionCount: len(options), } if err := idx.store.CreateRevision(ctx, rev); err != nil { return nil, fmt.Errorf("failed to create revision: %w", err) } // Store options if err := idx.storeOptions(ctx, rev.ID, options); err != nil { // Cleanup on failure idx.store.DeleteRevision(ctx, rev.ID) return nil, fmt.Errorf("failed to store options: %w", err) } return &IndexResult{ Revision: rev, OptionCount: len(options), Duration: time.Since(start), }, nil } // ReindexRevision forces re-indexing of a revision, deleting existing data first. func (idx *Indexer) ReindexRevision(ctx context.Context, revision string) (*IndexResult, error) { // Validate revision to prevent injection attacks if err := ValidateRevision(revision); err != nil { return nil, err } ref := resolveRevision(revision) // Delete existing revision if present existing, err := idx.store.GetRevision(ctx, ref) if err != nil { return nil, fmt.Errorf("failed to check existing revision: %w", err) } if existing != nil { if err := idx.store.DeleteRevision(ctx, existing.ID); err != nil { return nil, fmt.Errorf("failed to delete existing revision: %w", err) } } // Now index fresh return idx.IndexRevision(ctx, revision) } // buildOptions builds options.json for a nixpkgs revision. func (idx *Indexer) buildOptions(ctx context.Context, ref string) (string, func(), error) { // Create temp directory tmpDir, err := os.MkdirTemp("", "nixos-options-*") if err != nil { return "", nil, fmt.Errorf("failed to create temp dir: %w", err) } cleanup := func() { os.RemoveAll(tmpDir) } // Build options.json using nix-build // This evaluates the NixOS options from the specified nixpkgs revision nixExpr := fmt.Sprintf(` let nixpkgs = builtins.fetchTarball { url = "https://github.com/NixOS/nixpkgs/archive/%s.tar.gz"; }; pkgs = import nixpkgs { config = {}; }; eval = import (nixpkgs + "/nixos/lib/eval-config.nix") { modules = []; system = "x86_64-linux"; }; opts = (pkgs.nixosOptionsDoc { options = eval.options; }).optionsJSON; in opts `, ref) cmd := exec.CommandContext(ctx, "nix-build", "--no-out-link", "-E", nixExpr) cmd.Dir = tmpDir output, err := cmd.Output() if err != nil { cleanup() if exitErr, ok := err.(*exec.ExitError); ok { return "", nil, fmt.Errorf("nix-build failed: %s", string(exitErr.Stderr)) } return "", nil, fmt.Errorf("nix-build failed: %w", err) } // The output is the store path containing share/doc/nixos/options.json storePath := strings.TrimSpace(string(output)) optionsPath := filepath.Join(storePath, "share", "doc", "nixos", "options.json") if _, err := os.Stat(optionsPath); err != nil { cleanup() return "", nil, fmt.Errorf("options.json not found at %s", optionsPath) } return optionsPath, cleanup, nil } // storeOptions stores parsed options in the database. func (idx *Indexer) storeOptions(ctx context.Context, revisionID int64, options map[string]*ParsedOption) error { // Prepare batch of options opts := make([]*database.Option, 0, len(options)) declsByName := make(map[string][]*database.Declaration) for name, opt := range options { dbOpt := &database.Option{ RevisionID: revisionID, Name: name, ParentPath: database.ParentPath(name), Type: opt.Type, DefaultValue: opt.Default, Example: opt.Example, Description: opt.Description, ReadOnly: opt.ReadOnly, } opts = append(opts, dbOpt) // Prepare declarations for this option decls := make([]*database.Declaration, 0, len(opt.Declarations)) for _, path := range opt.Declarations { decls = append(decls, &database.Declaration{ FilePath: path, }) } declsByName[name] = decls } // Store options in batches batchSize := 1000 for i := 0; i < len(opts); i += batchSize { end := i + batchSize if end > len(opts) { end = len(opts) } batch := opts[i:end] if err := idx.store.CreateOptionsBatch(ctx, batch); err != nil { return fmt.Errorf("failed to store options batch: %w", err) } } // Store declarations for _, opt := range opts { decls := declsByName[opt.Name] for _, decl := range decls { decl.OptionID = opt.ID } if len(decls) > 0 { if err := idx.store.CreateDeclarationsBatch(ctx, decls); err != nil { return fmt.Errorf("failed to store declarations for %s: %w", opt.Name, err) } } } return nil } // getCommitDate gets the commit date for a git ref. func (idx *Indexer) getCommitDate(ctx context.Context, ref string) (time.Time, error) { // Use GitHub API to get commit info url := fmt.Sprintf("https://api.github.com/repos/NixOS/nixpkgs/commits/%s", ref) req, err := http.NewRequestWithContext(ctx, "GET", url, nil) if err != nil { return time.Time{}, err } req.Header.Set("Accept", "application/vnd.github.v3+json") resp, err := idx.httpClient.Do(req) if err != nil { return time.Time{}, err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return time.Time{}, fmt.Errorf("GitHub API returned %d", resp.StatusCode) } var commit struct { Commit struct { Committer struct { Date time.Time `json:"date"` } `json:"committer"` } `json:"commit"` } if err := json.NewDecoder(resp.Body).Decode(&commit); err != nil { return time.Time{}, err } return commit.Commit.Committer.Date, nil } // ResolveRevision resolves a channel name or ref to a git ref. func (idx *Indexer) ResolveRevision(revision string) string { // Check if it's a known channel alias if ref, ok := ChannelAliases[revision]; ok { return ref } return revision } // GetChannelName returns the channel name if the revision matches one. func (idx *Indexer) GetChannelName(revision string) string { if _, ok := ChannelAliases[revision]; ok { return revision } // Check if the revision is a channel ref value for name, ref := range ChannelAliases { if ref == revision { return name } } return "" } // resolveRevision is a helper that calls the method. func resolveRevision(revision string) string { // Check if it's a known channel alias if ref, ok := ChannelAliases[revision]; ok { return ref } return revision } // getChannelName is a helper that returns the channel name. func getChannelName(revision string) string { if _, ok := ChannelAliases[revision]; ok { return revision } // Check if the revision is a channel ref value for name, ref := range ChannelAliases { if ref == revision { return name } } return "" } // IndexFiles indexes files from a nixpkgs tarball. // This is a separate operation that can be run after IndexRevision. func (idx *Indexer) IndexFiles(ctx context.Context, revisionID int64, ref string) (int, error) { // Download nixpkgs tarball url := fmt.Sprintf("https://github.com/NixOS/nixpkgs/archive/%s.tar.gz", ref) req, err := http.NewRequestWithContext(ctx, "GET", url, nil) if err != nil { return 0, fmt.Errorf("failed to create request: %w", err) } resp, err := idx.httpClient.Do(req) if err != nil { return 0, fmt.Errorf("failed to download tarball: %w", err) } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return 0, fmt.Errorf("download failed with status %d", resp.StatusCode) } // Extract and index files gz, err := gzip.NewReader(resp.Body) if err != nil { return 0, fmt.Errorf("failed to create gzip reader: %w", err) } defer gz.Close() tr := tar.NewReader(gz) count := 0 batch := make([]*database.File, 0, 100) for { header, err := tr.Next() if err == io.EOF { break } if err != nil { return count, fmt.Errorf("tar read error: %w", err) } // Skip directories if header.Typeflag != tar.TypeReg { continue } // Check file extension ext := filepath.Ext(header.Name) if !AllowedExtensions[ext] { continue } // Skip very large files (> 1MB) if header.Size > 1024*1024 { continue } // Remove the top-level directory (nixpkgs-/) path := header.Name if idx := strings.Index(path, "/"); idx >= 0 { path = path[idx+1:] } // Read content content, err := io.ReadAll(tr) if err != nil { continue } file := &database.File{ RevisionID: revisionID, FilePath: path, Extension: ext, Content: string(content), } batch = append(batch, file) count++ // Store in batches if len(batch) >= 100 { if err := idx.store.CreateFilesBatch(ctx, batch); err != nil { return count, fmt.Errorf("failed to store files batch: %w", err) } batch = batch[:0] } } // Store remaining files if len(batch) > 0 { if err := idx.store.CreateFilesBatch(ctx, batch); err != nil { return count, fmt.Errorf("failed to store final files batch: %w", err) } } return count, nil }